From a0f3637deb4e2bcab3674313dafac21d08348b48 Mon Sep 17 00:00:00 2001
From: EJ Campbell
Date: Thu, 27 Nov 2025 13:15:24 -0800
Subject: [PATCH 1/2] Fix code viewer HTML class attribute corruption
- Replace sequential regex replacements with single-pass tokenizer in
CodeViewer.tsx highlightCode() function
- The old approach applied patterns sequentially, causing later patterns
to match numbers inside class attributes (e.g., "400" in "text-purple-400")
- New approach: build combined regex, iterate matches once, escape HTML
on matched tokens only
- Also fix escapeHtml in formatters.ts to not use document.createElement
(fails during SSR) and simplify formatLargeText to avoid over-formatting
---
web/app/components/CodeViewer.tsx | 75 ++++++++++++++++++++-----------
web/app/utils/formatters.ts | 52 +++++++++------------
2 files changed, 70 insertions(+), 57 deletions(-)
diff --git a/web/app/components/CodeViewer.tsx b/web/app/components/CodeViewer.tsx
index f9f4aae6..a4b0dcfd 100644
--- a/web/app/components/CodeViewer.tsx
+++ b/web/app/components/CodeViewer.tsx
@@ -82,39 +82,64 @@ export function CodeViewer({ code, fileName, language }: CodeViewerProps) {
const detectedLanguage = language || getLanguageFromFileName(fileName);
- // Basic syntax highlighting for common tokens
+ // Single-pass syntax highlighting to avoid corrupting HTML class attributes
const highlightCode = (code: string): string => {
- // Escape HTML
- let highlighted = code
+ // Escape HTML helper
+ const escapeHtml = (str: string) => str
.replace(/&/g, '&')
.replace(//g, '>');
- // Common patterns for many languages
- const patterns = [
- // Strings
- { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/g, class: 'text-green-400' },
- // Comments
- { regex: /(\/\/.*$)/gm, class: 'text-gray-500 italic' },
- { regex: /(\/\*[\s\S]*?\*\/)/g, class: 'text-gray-500 italic' },
- { regex: /(#.*$)/gm, class: 'text-gray-500 italic' },
- // Numbers
- { regex: /\b(\d+\.?\d*)\b/g, class: 'text-purple-400' },
- // Keywords (common across many languages)
- { regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default)\b/g, class: 'text-blue-400' },
- // Boolean and null values
- { regex: /\b(true|false|null|undefined|nil|None|True|False)\b/g, class: 'text-orange-400' },
- // Function calls (basic)
- { regex: /(\w+)(?=\s*\()/g, class: 'text-yellow-400' },
- // Types/Classes (PascalCase)
- { regex: /\b([A-Z][a-zA-Z0-9]*)\b/g, class: 'text-cyan-400' },
+ // Define token patterns with priorities (first match wins)
+ // Order matters: strings and comments first to avoid highlighting inside them
+ const tokenPatterns = [
+ { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/, className: 'text-green-400' }, // strings
+ { regex: /\/\/.*$/, className: 'text-gray-500 italic' }, // single-line comments
+ { regex: /\/\*[\s\S]*?\*\//, className: 'text-gray-500 italic' }, // multi-line comments
+ { regex: /#.*$/, className: 'text-gray-500 italic' }, // hash comments
+ { regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default|fn|pub|mod|use|mut|match|loop|impl|trait|where|type|readonly|override)\b/, className: 'text-blue-400' }, // keywords
+ { regex: /\b(true|false|null|undefined|nil|None|True|False|NULL)\b/, className: 'text-orange-400' }, // literals
+ { regex: /\b\d+\.?\d*\b/, className: 'text-purple-400' }, // numbers
+ { regex: /\b[A-Z][a-zA-Z0-9]*\b/, className: 'text-cyan-400' }, // PascalCase (types/classes)
];
- patterns.forEach(({ regex, class: className }) => {
- highlighted = highlighted.replace(regex, `$&`);
- });
+ // Build a combined regex that matches any token
+ const combinedPattern = new RegExp(
+ tokenPatterns.map(p => `(${p.regex.source})`).join('|'),
+ 'gm'
+ );
- return highlighted;
+ let result = '';
+ let lastIndex = 0;
+
+ // Single pass through the string
+ for (const match of code.matchAll(combinedPattern)) {
+ // Add non-matched text before this match (escaped)
+ if (match.index! > lastIndex) {
+ result += escapeHtml(code.slice(lastIndex, match.index));
+ }
+
+ // Find which pattern matched (first non-undefined capture group)
+ const matchedText = match[0];
+ let className = '';
+ for (let i = 0; i < tokenPatterns.length; i++) {
+ if (match[i + 1] !== undefined) {
+ className = tokenPatterns[i].className;
+ break;
+ }
+ }
+
+ // Add the highlighted token (escape the matched text too)
+ result += `${escapeHtml(matchedText)}`;
+ lastIndex = match.index! + matchedText.length;
+ }
+
+ // Add remaining text after last match
+ if (lastIndex < code.length) {
+ result += escapeHtml(code.slice(lastIndex));
+ }
+
+ return result;
};
const handleCopy = async () => {
diff --git a/web/app/utils/formatters.ts b/web/app/utils/formatters.ts
index 4b02e5a3..a18779e1 100644
--- a/web/app/utils/formatters.ts
+++ b/web/app/utils/formatters.ts
@@ -37,9 +37,12 @@ export function formatJSON(obj: any, maxLength: number = 1000): string {
* Escapes HTML characters to prevent XSS
*/
export function escapeHtml(text: string): string {
- const div = document.createElement('div');
- div.textContent = text;
- return div.innerHTML;
+ return text
+ .replace(/&/g, '&')
+ .replace(//g, '>')
+ .replace(/"/g, '"')
+ .replace(/'/g, ''');
}
/**
@@ -47,39 +50,24 @@ export function escapeHtml(text: string): string {
*/
export function formatLargeText(text: string): string {
if (!text) return '';
-
+
// Escape HTML first
const escaped = escapeHtml(text);
-
- // Format the text with proper spacing and structure
+
+ // Simple, safe formatting - just handle line breaks and basic markdown
return escaped
- // Preserve existing double line breaks
- .replace(/\n\n/g, '
')
- // Convert single line breaks to single
tags
+ // Preserve existing double line breaks as paragraph breaks
+ .replace(/\n\n/g, '
')
+ // Convert single line breaks to
tags
.replace(/\n/g, '
')
- // Format bullet points with modern styling
- .replace(/^(\s*)([-*•])\s+(.+)$/gm, '$1$3')
- // Format numbered lists with modern styling
- .replace(/^(\s*)(\d+)\.\s+(.+)$/gm, '$1$2$3')
- // Format headers with better typography
- .replace(/^([A-Z][^<\n]*:)(
|$)/gm, '
$1
$2')
- // Format code blocks with better styling
- .replace(/\b([A-Z_]{3,})\b/g, '$1')
- // Format file paths and technical terms
- .replace(/\b([a-zA-Z0-9_-]+\.[a-zA-Z]{2,4})\b/g, '$1')
- // Format URLs with modern link styling
- .replace(/(https?:\/\/[^\s<]+)/g, '$1')
- // Format quoted text
- .replace(/^(\s*)([""](.+?)[""])/gm, '$1$3
')
- // Add proper spacing around paragraphs
- .replace(/(
)/g, '')
- // Clean up any excessive spacing
- .replace(/(
\s*){3,}/g, '
')
- // Format emphasis patterns
- .replace(/\*\*([^*]+)\*\*/g, '$1')
- .replace(/\*([^*]+)\*/g, '$1')
- // Format inline code
- .replace(/`([^`]+)`/g, '$1');
+ // Format inline code (backticks)
+ .replace(/`([^`]+)`/g, '$1')
+ // Format bold text
+ .replace(/\*\*([^*]+)\*\*/g, '$1')
+ // Format italic text
+ .replace(/\*([^*]+)\*/g, '$1')
+ // Wrap in paragraph
+ .replace(/^(.*)$/, '$1
');
}
/**
From 77b4d70ca780f17d3a7f943cd9a879546a13b901 Mon Sep 17 00:00:00 2001
From: EJ Campbell
Date: Sat, 20 Dec 2025 09:01:17 -0800
Subject: [PATCH 2/2] Fix Copilot review issues and add tests
- Add quote escaping to escapeHtml in CodeViewer.tsx for XSS protection
- Fix string regex patterns to properly handle escaped quotes
- Use template literal for paragraph wrapping in formatLargeText
- Add vitest and tests for escapeHtml, formatLargeText, and string patterns
---
web/app/components/CodeViewer.test.tsx | 83 +
web/app/components/CodeViewer.tsx | 8 +-
web/app/utils/formatters.test.ts | 76 +
web/app/utils/formatters.ts | 9 +-
web/package-lock.json | 4156 ++++++++++--------------
web/package.json | 7 +-
web/vitest.config.ts | 8 +
7 files changed, 1942 insertions(+), 2405 deletions(-)
create mode 100644 web/app/components/CodeViewer.test.tsx
create mode 100644 web/app/utils/formatters.test.ts
create mode 100644 web/vitest.config.ts
diff --git a/web/app/components/CodeViewer.test.tsx b/web/app/components/CodeViewer.test.tsx
new file mode 100644
index 00000000..be2568ec
--- /dev/null
+++ b/web/app/components/CodeViewer.test.tsx
@@ -0,0 +1,83 @@
+import { describe, it, expect } from 'vitest';
+
+// Test the escapeHtml and string regex patterns used in CodeViewer
+// We test the logic directly since the component uses internal functions
+
+describe('CodeViewer escapeHtml', () => {
+ // Replicate the escapeHtml function from CodeViewer
+ const escapeHtml = (str: string) => str
+ .replace(/&/g, '&')
+ .replace(//g, '>')
+ .replace(/"/g, '"')
+ .replace(/'/g, ''');
+
+ it('escapes double quotes for attribute safety', () => {
+ expect(escapeHtml('class="foo"')).toBe('class="foo"');
+ });
+
+ it('escapes single quotes for attribute safety', () => {
+ expect(escapeHtml("class='foo'")).toBe("class='foo'");
+ });
+
+ it('escapes HTML tags', () => {
+ expect(escapeHtml('')).toBe('<div>');
+ });
+
+ it('escapes ampersands', () => {
+ expect(escapeHtml('a && b')).toBe('a && b');
+ });
+});
+
+describe('CodeViewer string regex patterns', () => {
+ // Test the improved string patterns
+ const doubleQuotePattern = /"(?:[^"\\]|\\.)*"/;
+ const singleQuotePattern = /'(?:[^'\\]|\\.)*'/;
+ const backtickPattern = /`(?:[^`\\]|\\.)*`/;
+
+ describe('double-quoted strings', () => {
+ it('matches simple double-quoted strings', () => {
+ expect('"hello"'.match(doubleQuotePattern)?.[0]).toBe('"hello"');
+ });
+
+ it('matches strings with escaped quotes', () => {
+ expect('"He said \\"hello\\""'.match(doubleQuotePattern)?.[0]).toBe('"He said \\"hello\\""');
+ });
+
+ it('matches strings with escaped backslashes', () => {
+ expect('"path\\\\to\\\\file"'.match(doubleQuotePattern)?.[0]).toBe('"path\\\\to\\\\file"');
+ });
+
+ it('matches empty strings', () => {
+ expect('""'.match(doubleQuotePattern)?.[0]).toBe('""');
+ });
+ });
+
+ describe('single-quoted strings', () => {
+ it('matches simple single-quoted strings', () => {
+ expect("'hello'".match(singleQuotePattern)?.[0]).toBe("'hello'");
+ });
+
+ it('matches strings with escaped quotes', () => {
+ expect("'It\\'s fine'".match(singleQuotePattern)?.[0]).toBe("'It\\'s fine'");
+ });
+
+ it('matches empty strings', () => {
+ expect("''".match(singleQuotePattern)?.[0]).toBe("''");
+ });
+ });
+
+ describe('backtick strings', () => {
+ it('matches simple backtick strings', () => {
+ expect('`hello`'.match(backtickPattern)?.[0]).toBe('`hello`');
+ });
+
+ it('matches strings with escaped backticks', () => {
+ expect('`use \\`code\\``'.match(backtickPattern)?.[0]).toBe('`use \\`code\\``');
+ });
+
+ it('matches empty strings', () => {
+ expect('``'.match(backtickPattern)?.[0]).toBe('``');
+ });
+ });
+});
diff --git a/web/app/components/CodeViewer.tsx b/web/app/components/CodeViewer.tsx
index a4b0dcfd..5f9cc4a3 100644
--- a/web/app/components/CodeViewer.tsx
+++ b/web/app/components/CodeViewer.tsx
@@ -88,12 +88,16 @@ export function CodeViewer({ code, fileName, language }: CodeViewerProps) {
const escapeHtml = (str: string) => str
.replace(/&/g, '&')
.replace(//g, '>');
+ .replace(/>/g, '>')
+ .replace(/"/g, '"')
+ .replace(/'/g, ''');
// Define token patterns with priorities (first match wins)
// Order matters: strings and comments first to avoid highlighting inside them
const tokenPatterns = [
- { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/, className: 'text-green-400' }, // strings
+ { regex: /"(?:[^"\\]|\\.)*"/, className: 'text-green-400' }, // double-quoted strings
+ { regex: /'(?:[^'\\]|\\.)*'/, className: 'text-green-400' }, // single-quoted strings
+ { regex: /`(?:[^`\\]|\\.)*`/, className: 'text-green-400' }, // backtick strings
{ regex: /\/\/.*$/, className: 'text-gray-500 italic' }, // single-line comments
{ regex: /\/\*[\s\S]*?\*\//, className: 'text-gray-500 italic' }, // multi-line comments
{ regex: /#.*$/, className: 'text-gray-500 italic' }, // hash comments
diff --git a/web/app/utils/formatters.test.ts b/web/app/utils/formatters.test.ts
new file mode 100644
index 00000000..74a468e6
--- /dev/null
+++ b/web/app/utils/formatters.test.ts
@@ -0,0 +1,76 @@
+import { describe, it, expect } from 'vitest';
+import { escapeHtml, formatLargeText } from './formatters';
+
+describe('escapeHtml', () => {
+ it('escapes ampersands', () => {
+ expect(escapeHtml('a & b')).toBe('a & b');
+ });
+
+ it('escapes less than', () => {
+ expect(escapeHtml('a < b')).toBe('a < b');
+ });
+
+ it('escapes greater than', () => {
+ expect(escapeHtml('a > b')).toBe('a > b');
+ });
+
+ it('escapes double quotes', () => {
+ expect(escapeHtml('He said "hello"')).toBe('He said "hello"');
+ });
+
+ it('escapes single quotes', () => {
+ expect(escapeHtml("It's fine")).toBe("It's fine");
+ });
+
+ it('escapes all special characters together', () => {
+ expect(escapeHtml('')).toBe(
+ '<script>"alert('xss')&"</script>'
+ );
+ });
+});
+
+describe('formatLargeText', () => {
+ it('returns empty string for empty input', () => {
+ expect(formatLargeText('')).toBe('');
+ });
+
+ it('wraps simple text in paragraph tags', () => {
+ expect(formatLargeText('Hello world')).toBe('
Hello world
');
+ });
+
+ it('converts single newlines to br tags', () => {
+ expect(formatLargeText('Line1\nLine2')).toBe('
Line1
Line2
');
+ });
+
+ it('converts double newlines to paragraph breaks with proper nesting', () => {
+ const result = formatLargeText('Line1\n\nLine2');
+ expect(result).toBe('
Line1
Line2
');
+ });
+
+ it('handles multiple paragraph breaks correctly', () => {
+ const result = formatLargeText('Para1\n\nPara2\n\nPara3');
+ expect(result).toBe('
Para1
Para2
Para3
');
+ });
+
+ it('escapes HTML in the input', () => {
+ const result = formatLargeText('');
+ expect(result).toContain('<script>');
+ expect(result).not.toContain('