feat(TextPainter): improve support for strings containing ansi sequences

Im-Beast · Im-Beast · commit 54e6ebae63d6 · 2023-09-15T19:30:14.000+02:00
diff --git a/examples/demo.ts b/examples/demo.ts
@@ -508,8 +508,8 @@ const performanceStats = new Label({
   theme: baseTheme,
   text: new Computed(() =>
     `\
-FPS: ${fps.value.toFixed(2)}\
- | Components: ${tui.components.size}\
+${crayon.bgRed.green(`FPS: ${fps.value.toFixed(2)}`)}\
+ | ${crayon.yellow(`Components: ${tui.components.size}`)}\
  | Drawn objects: ${tui.canvas.painters.length}\
  | Updated objects: ${tui.canvas.rerenderedObjects}\
  | Press CTRL+F to toggle Frame/Label visibility`
diff --git a/src/canvas/painters/text.ts b/src/canvas/painters/text.ts
@@ -8,9 +8,10 @@ import { Dependency, Subscription } from "../../signals/types.ts";
 import { Effect } from "../../signals/effect.ts";
 import {
   cropToWidth,
+  detectMultiCodePointCharactersUsage,
   getMultiCodePointCharacters,
+  reapplyCharacterStyles,
   textWidth,
-  usesMultiCodePointCharacters,
 } from "../../utils/strings.ts";
 import { jinkReactiveObject, unjinkReactiveObject } from "../../signals/reactivity.ts";
 import { fitsInRectangle, rectangleEquals, rectangleIntersection } from "../../utils/numbers.ts";
@@ -78,7 +79,7 @@ export class TextPainter extends Painter<"text"> {
     this.alignHorizontally = signalify(options.alignHorizontally ?? 0);
 
     this.multiCodePointSupport = signalify(
-      options.multiCodePointSupport ?? usesMultiCodePointCharacters(this.text.peek()),
+      options.multiCodePointSupport ?? detectMultiCodePointCharactersUsage(this.text.peek()),
     );
     this.overwriteRectangle = signalify(options.overwriteRectangle ?? false);
 
@@ -184,7 +185,9 @@ export class TextPainter extends Painter<"text"> {
           }
 
           if (multiCodePointSupport) {
-            alignedLine = getMultiCodePointCharacters(alignedLine);
+            alignedLine = reapplyCharacterStyles(
+              getMultiCodePointCharacters(alignedLine),
+            );
           }
 
           if (Array.isArray(alignedLine)) {
diff --git a/src/components/label.ts b/src/components/label.ts
@@ -4,7 +4,7 @@ import { TextPainter } from "../canvas/painters/text.ts";
 import { Computed, Signal, SignalOfObject } from "../signals/mod.ts";
 
 import { signalify } from "../utils/signals.ts";
-import { splitToArray } from "../utils/strings.ts";
+import { detectMultiCodePointCharactersUsage, splitToArray } from "../utils/strings.ts";
 import { Rectangle } from "../types.ts";
 
 /**
@@ -111,7 +111,9 @@ export class Label extends Component {
 
     this.text = signalify(options.text);
     this.overwriteRectangle = signalify(options.overwriteRectangle ?? false);
-    this.multiCodePointSupport = signalify(options.multiCodePointSupport ?? false);
+    this.multiCodePointSupport = signalify(
+      options.multiCodePointSupport ?? detectMultiCodePointCharactersUsage(this.text.peek()),
+    );
     this.align = signalify(
       options.align ?? {
         vertical: 0,
diff --git a/src/utils/strings.ts b/src/utils/strings.ts
@@ -8,20 +8,18 @@
 export const UNICODE_CHAR_REGEXP =
   /\ud83c[\udffb-\udfff](?=\ud83c[\udffb-\udfff])|(?:(?:\ud83c\udff4\udb40\udc67\udb40\udc62\udb40(?:\udc65|\udc73|\udc77)\udb40(?:\udc6e|\udc63|\udc6c)\udb40(?:\udc67|\udc74|\udc73)\udb40\udc7f)|[^\ud800-\udfff][\u0300-\u036f\ufe20-\ufe2f\u20d0-\u20ff\u1ab0-\u1aff\u1dc0-\u1dff]?|[\u0300-\u036f\ufe20-\ufe2f\u20d0-\u20ff\u1ab0-\u1aff\u1dc0-\u1dff]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff]|[\ud800-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe2f\u20d0-\u20ff\u1ab0-\u1aff\u1dc0-\u1dff]|\ud83c[\udffb-\udfff])?(?:\u200d(?:[^\ud800-\udfff]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe2f\u20d0-\u20ff\u1ab0-\u1aff\u1dc0-\u1dff]|\ud83c[\udffb-\udfff])?)*/g;
 
-export function usesMultiCodePointCharacters(text: string | string[]): boolean {
-  if (!text) {
-    return false;
-  }
-
-  if (Array.isArray(text)) {
+export function detectMultiCodePointCharactersUsage(text: string | string[]): boolean {
+  if (!text) return false;
+  else if (text.includes("\x1b")) return true;
+  else if (Array.isArray(text)) {
     for (const line of text) {
       if (getMultiCodePointCharacters(line).length === line.length) {
         return true;
       }
     }
+
     return false;
   }
-
   return getMultiCodePointCharacters(text).length === text.length;
 }
 
@@ -31,52 +29,102 @@ export function getMultiCodePointCharacters(text: string): string[] {
   if (!text) return empty;
   const matched = text.match(UNICODE_CHAR_REGEXP);
 
-  if (matched?.includes("\x1b")) {
-    const arr: string[] = [];
-    let i = 0;
-    let ansi = 0;
-    let lastStyle = "";
-    for (const char of matched) {
-      arr[i] ??= "";
-      arr[i] += lastStyle + char;
+  return matched ?? empty;
+}
+
+/**
+ * Reapplies style for each character
+ * If given an array it does modifications on that array instead of creating a new one
+ *
+ * @example
+ * ```ts
+ * console.log(repplyCharacterStyles("\x1b[32mHi")); // "\x1b[32mH\x1b[32mi"
+ * ```
+ *
+ * @example
+ * ```ts
+ * const arr = ["\x1b[32mH", "i"];
+ * console.log(repplyCharacterStyles(arr)); // ["\x1b[32mH", "\x1b[32mi"];
+ * console.log(arr);  // ["\x1b[32mH", "\x1b[32mi"];
+ * ```
+ */
+export function reapplyCharacterStyles(text: string[]): string[] {
+  // Heuristic for skipping reapplying when text doesn't include introducer
+  if (!text.includes("\x1b")) {
+    return text;
+  }
+
+  let i = 0;
+  let ansi = 0;
+  let lastStyle = "";
+  let flushStyle = false;
 
-      if (char === "\x1b") {
+  for (const char of text) {
+    if (char === "\x1b") {
+      // possible start of an ansi sequence
+      ++ansi;
+    } else if (ansi === 1) {
+      // confirm whether ansi sequence has been started
+      if (char === "[") {
+        lastStyle += "\x1b" + char;
         ++ansi;
-        lastStyle += "\x1b";
-      } else if (ansi) {
-        lastStyle += char;
+      } else {
+        ansi = 0;
+      }
+    } else if (ansi > 1) {
+      lastStyle += char;
+
+      const isFinalByte = isFinalAnsiByte(char);
 
-        if (ansi === 3 && char === "m" && lastStyle[lastStyle.length - 2] === "0") {
+      if (isFinalByte) {
+        flushStyle = true;
+
+        // End of ansi sequence
+        if (ansi === 3 && lastStyle[lastStyle.length - 2] === "0") {
+          // Style is "\x1b[0m" – no need to store the last style when all of them got cleared
           lastStyle = "";
         }
 
-        if (char === "m") {
-          ansi = 0;
-        } else {
-          ++ansi;
-        }
+        ansi = 0;
       } else {
-        ++i;
+        // Part of an ansi sequence
+        ++ansi;
+      }
+    } else {
+      if (flushStyle) {
+        text[i] = lastStyle + char;
       }
+
+      ++i;
     }
+  }
 
-    return arr;
+  if (text.length > i) {
+    while (text.length > i) {
+      text.pop();
+    }
   }
 
-  return matched ?? empty;
+  return text;
+}
+
+export function isFinalAnsiByte(character: string): boolean {
+  const codePoint = character.charCodeAt(0);
+  // don't include 0x70–0x7E range because its considered "private"
+  return codePoint >= 0x40 && codePoint < 0x70;
 }
 
-/** Strips string of all its styles */
-export function stripStyles(string: string): string {
+/** Strips text of all its styles */
+export function stripStyles(text: string): string {
   let stripped = "";
   let ansi = false;
-  const len = string.length;
+  const len = text.length;
   for (let i = 0; i < len; ++i) {
-    const char = string[i];
+    const char = text[i];
     if (char === "\x1b") {
       ansi = true;
       i += 2; // [ "\x1b" "[" "X" "m" ] <-- shortest ansi sequence
-    } else if (char === "m" && ansi) {
+    } else if (ansi && isFinalAnsiByte(char)) {
       ansi = false;
     } else if (!ansi) {
       stripped += char;
@@ -85,9 +133,9 @@ export function stripStyles(string: string): string {
   return stripped;
 }
 
-/** Inserts {value} into {string} on given {index} */
-export function insertAt(string: string, index: number, value: string): string {
-  return string.slice(0, index) + value + string.slice(index);
+/** Inserts {value} into {text} on given {index} */
+export function insertAt(text: string, index: number, value: string): string {
+  return text.slice(0, index) + value + text.slice(index);
 }
 
 /** Returns real {text} width */
@@ -102,7 +150,7 @@ export function textWidth(text: string, start = 0): number {
     if (char === "\x1b") {
       ansi = true;
       i += 2; // [ "\x1b" "[" "X" "m" ] <-- shortest ansi sequence
-    } else if (char === "m" && ansi) {
+    } else if (ansi && isFinalAnsiByte(char)) {
       ansi = false;
     } else if (!ansi) {
       width += characterWidth(char);
diff --git a/tests/utils/strings.test.ts b/tests/utils/strings.test.ts
@@ -1,6 +1,14 @@
 // Copyright 2023 Im-Beast. All rights reserved. MIT license.
 
-import { characterWidth, insertAt, stripStyles, textWidth, UNICODE_CHAR_REGEXP } from "../../src/utils/strings.ts";
+import { getMultiCodePointCharacters } from "../../mod.ts";
+import {
+  characterWidth,
+  insertAt,
+  reapplyCharacterStyles,
+  stripStyles,
+  textWidth,
+  UNICODE_CHAR_REGEXP,
+} from "../../src/utils/strings.ts";
 import { assertEquals } from "../deps.ts";
 
 const unicodeString = "♥☭👀f🌏g⚠5✌💢✅💛🌻";
@@ -38,4 +46,50 @@ Deno.test("utils/strings.ts", async (t) => {
     assertEquals(textWidth(fullWidths.join("")), fullWidths.length * 2);
     assertEquals(textWidth("Hello"), 5);
   });
+
+  await t.step("getMultiCodePointCharacters()", () => {
+    assertEquals(getMultiCodePointCharacters("dog"), ["d", "o", "g"]);
+    assertEquals(getMultiCodePointCharacters("\x1b[32mHi\x1b[0m"), [
+      "\x1b",
+      "[",
+      "3",
+      "2",
+      "m",
+      "H",
+      "i",
+      "\x1b",
+      "[",
+      "0",
+      "m",
+    ]);
+  });
+
+  await t.step("reapplyCharacterStyles()", () => {
+    assertEquals(
+      reapplyCharacterStyles(
+        getMultiCodePointCharacters("dog"),
+      ),
+      ["d", "o", "g"],
+    );
+
+    assertEquals(
+      reapplyCharacterStyles(
+        getMultiCodePointCharacters("\x1b[32mHello world!"),
+      ),
+      [
+        "\x1b[32mH",
+        "\x1b[32me",
+        "\x1b[32ml",
+        "\x1b[32ml",
+        "\x1b[32mo",
+        "\x1b[32m ",
+        "\x1b[32mw",
+        "\x1b[32mo",
+        "\x1b[32mr",
+        "\x1b[32ml",
+        "\x1b[32md",
+        "\x1b[32m!",
+      ],
+    );
+  });
 });