feat: add sanitizeTextForRender method

scmmishra · scmmishra · commit 8e5a0a9f47a8 · 2025-09-04T11:28:04.000+05:30
diff --git a/src/index.ts b/src/index.ts
@@ -16,7 +16,7 @@ import { toURL, isSameHost, isValidDomain } from './url';
 
 import { getRecipients } from './email';
 
-import { parseBoolean } from './string';
+import { parseBoolean, sanitizeTextForRender } from './string';
 import {
   sortAsc,
   quantile,
@@ -62,6 +62,7 @@ export {
   parseBoolean,
   quantile,
   replaceVariablesInMessage,
+  sanitizeTextForRender,
   sortAsc,
   splitName,
   toURL,
diff --git a/src/string.ts b/src/string.ts
@@ -16,3 +16,64 @@ export function parseBoolean(candidate: string | number) {
     return false;
   }
 }
+
+/**
+ * Sanitizes text for safe HTML rendering by escaping potentially dangerous characters
+ * while preserving valid HTML tags.
+ *
+ * This function performs the following transformations:
+ * - Converts newline characters (\n) to HTML line breaks (<br>)
+ * - Escapes stray '<' characters that are not part of valid HTML tags (e.g., "x < 5" → "x &lt; 5")
+ * - Escapes stray '>' characters that are not part of valid HTML tags (e.g., "x > 5" → "x &gt; 5")
+ * - Preserves valid HTML tags and their attributes (e.g., <div>, <span class="test">, </p>)
+ *
+ * LIMITATIONS: This regex-based approach has known limitations:
+ * - Cannot properly handle '>' characters inside HTML attributes (e.g., <div title="x > 5"> may not work correctly)
+ * - Complex nested quotes or edge cases may not be handled perfectly
+ * - For more complex HTML sanitization needs, consider using a proper HTML parser
+ *
+ * @param {string} text - The text to sanitize. Can be null or undefined.
+ * @returns {string} The sanitized text safe for HTML rendering, or the original value if null/undefined.
+ *
+ * @example
+ * sanitizeTextForRender('Hello\nWorld') // 'Hello<br>World'
+ * sanitizeTextForRender('if x < 5') // 'if x &lt; 5'
+ * sanitizeTextForRender('<div>Hello</div>') // '<div>Hello</div>'
+ * sanitizeTextForRender('Price < $100 <strong>Sale!</strong>') // 'Price &lt; $100 <strong>Sale!</strong>'
+ */
+export function sanitizeTextForRender(text: string | null | undefined) {
+  if (!text) return text;
+
+  return (
+    text
+      .replace(/\n/g, '<br>')
+
+      // Escape < that doesn't start a valid HTML tag
+      // Regex breakdown:
+      // <          - matches '<'
+      // (?!        - negative lookahead (not followed by)
+      //   \/?      - optional forward slash for closing tags
+      //   \w+      - one or more word characters (tag name)
+      //   (?:      - non-capturing group for attributes
+      //     \s+    - whitespace before attributes
+      //     [^>]*  - any characters except '>' (attribute content)
+      //   )?       - attributes are optional
+      //   \/?>     - optional self-closing slash, then '>'
+      // )          - end lookahead
+      .replace(/<(?!\/?\w+(?:\s+[^>]*)?\/?>)/g, '&lt;')
+
+      // Escape > that isn't part of an HTML tag
+      // Regex breakdown:
+      // (?<!       - negative lookbehind (not preceded by)
+      //   <        - opening '<'
+      //   \/?      - optional forward slash for closing tags
+      //   \w+      - one or more word characters (tag name)
+      //   (?:      - non-capturing group for attributes
+      //     \s+    - whitespace before attributes
+      //     [^>]*  - any characters except '>' (attribute content)
+      //   )?       - attributes are optional
+      // )          - end lookbehind
+      // >          - matches '>'
+      .replace(/(?<!<\/?\w+(?:\s+[^>]*)?)>/g, '&gt;')
+  );
+}
diff --git a/test/string.test.ts b/test/string.test.ts
@@ -1,4 +1,4 @@
-import { parseBoolean } from '../src';
+import { parseBoolean, sanitizeTextForRender } from '../src';
 
 describe('#parseBoolean', () => {
   test('returns true for input "true"', () => {
@@ -37,3 +37,90 @@ describe('#parseBoolean', () => {
     expect(parseBoolean(undefined)).toBe(false);
   });
 });
+
+describe('#sanitizeTextForRender', () => {
+  it('should handle null and undefined values', () => {
+    expect(sanitizeTextForRender(null)).toBe(null);
+    expect(sanitizeTextForRender(undefined)).toBe(undefined);
+    expect(sanitizeTextForRender('')).toBe('');
+  });
+
+  it('should convert newlines to <br> tags', () => {
+    expect(sanitizeTextForRender('Line 1\nLine 2')).toBe('Line 1<br>Line 2');
+    expect(sanitizeTextForRender('Multiple\n\nNewlines')).toBe('Multiple<br><br>Newlines');
+  });
+
+  it('should escape stray < characters', () => {
+    expect(sanitizeTextForRender('if x < 5')).toBe('if x &lt; 5');
+    expect(sanitizeTextForRender('< this is not a tag')).toBe('&lt; this is not a tag');
+    expect(sanitizeTextForRender('price < $100')).toBe('price &lt; $100');
+  });
+
+  it('should escape stray > characters', () => {
+    expect(sanitizeTextForRender('if x > 5')).toBe('if x &gt; 5');
+    expect(sanitizeTextForRender('this is not a tag >')).toBe('this is not a tag &gt;');
+    expect(sanitizeTextForRender('score > 90%')).toBe('score &gt; 90%');
+  });
+
+  it('should escape both stray < and > characters', () => {
+    expect(sanitizeTextForRender('5 < x < 10')).toBe('5 &lt; x &lt; 10');
+    expect(sanitizeTextForRender('x > 5 && y < 10')).toBe('x &gt; 5 && y &lt; 10');
+  });
+
+  it('should preserve valid HTML tags', () => {
+    expect(sanitizeTextForRender('<div>Hello</div>')).toBe('<div>Hello</div>');
+    expect(sanitizeTextForRender('<span class="test">World</span>')).toBe('<span class="test">World</span>');
+    expect(sanitizeTextForRender('<br>')).toBe('<br>');
+    expect(sanitizeTextForRender('<img src="test.jpg" />')).toBe('<img src="test.jpg" />');
+  });
+
+  it('should preserve nested HTML tags', () => {
+    expect(sanitizeTextForRender('<div><span>Nested</span></div>')).toBe('<div><span>Nested</span></div>');
+    expect(sanitizeTextForRender('<ul><li>Item 1</li><li>Item 2</li></ul>'))
+      .toBe('<ul><li>Item 1</li><li>Item 2</li></ul>');
+  });
+
+  it('should handle mixed content with valid tags and stray characters', () => {
+    expect(sanitizeTextForRender('Price < $100 <strong>on sale</strong>'))
+      .toBe('Price &lt; $100 <strong>on sale</strong>');
+    expect(sanitizeTextForRender('<p>x > 5</p> and y < 10'))
+      .toBe('<p>x &gt; 5</p> and y &lt; 10');
+  });
+
+  it('should handle edge cases with malformed HTML-like content', () => {
+    expect(sanitizeTextForRender('<<invalid>>')).toBe('&lt;<invalid>&gt;');
+    expect(sanitizeTextForRender('<not a tag')).toBe('&lt;not a tag');
+    expect(sanitizeTextForRender('not a tag>')).toBe('not a tag&gt;');
+  });
+
+  it('should handle email addresses and URLs with angle brackets', () => {
+    expect(sanitizeTextForRender('Contact: <user@example.com>'))
+      .toBe('Contact: &lt;user@example.com&gt;');
+    expect(sanitizeTextForRender('Email me at < user@example.com >'))
+      .toBe('Email me at &lt; user@example.com &gt;');
+  });
+
+  it('should handle mathematical expressions', () => {
+    expect(sanitizeTextForRender('if (x < y && y > z)')).toBe('if (x &lt; y && y &gt; z)');
+    expect(sanitizeTextForRender('array[i] < array[j]')).toBe('array[i] &lt; array[j]');
+  });
+
+  it('should handle HTML entities within valid tags', () => {
+    expect(sanitizeTextForRender('<div>&lt;escaped&gt;</div>'))
+      .toBe('<div>&lt;escaped&gt;</div>');
+    expect(sanitizeTextForRender('<span>already &amp; escaped</span>'))
+      .toBe('<span>already &amp; escaped</span>');
+  });
+
+  it('should handle complex real-world email content', () => {
+    const emailContent = `Hello,\n\nThe price is < $50 for items where quantity > 10.\n<p>Best regards,</p>\n<strong>Sales Team</strong>`;
+    const expected = `Hello,<br><br>The price is &lt; $50 for items where quantity &gt; 10.<br><p>Best regards,</p><br><strong>Sales Team</strong>`;
+    expect(sanitizeTextForRender(emailContent)).toBe(expected);
+  });
+
+  it('should handle quoted email content', () => {
+    const quoted = `Original message:\n> User wrote: x < 5\n<blockquote>Previous reply</blockquote>`;
+    const expected = `Original message:<br>&gt; User wrote: x &lt; 5<br><blockquote>Previous reply</blockquote>`;
+    expect(sanitizeTextForRender(quoted)).toBe(expected);
+  });
+});