seifghazi · ejc3 · Nov 27, 2025 · Dec 20, 2025 · Copilot · Dec 3, 2025
diff --git a/web/app/components/CodeViewer.test.tsx b/web/app/components/CodeViewer.test.tsx
@@ -0,0 +1,83 @@
+import { describe, it, expect } from 'vitest';
+
+// Test the escapeHtml and string regex patterns used in CodeViewer
+// We test the logic directly since the component uses internal functions
+
+describe('CodeViewer escapeHtml', () => {
+  // Replicate the escapeHtml function from CodeViewer
+  const escapeHtml = (str: string) => str
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&#039;');
+
+  it('escapes double quotes for attribute safety', () => {
+    expect(escapeHtml('class="foo"')).toBe('class=&quot;foo&quot;');
+  });
+
+  it('escapes single quotes for attribute safety', () => {
+    expect(escapeHtml("class='foo'")).toBe("class=&#039;foo&#039;");
+  });
+
+  it('escapes HTML tags', () => {
+    expect(escapeHtml('<div>')).toBe('&lt;div&gt;');
+  });
+
+  it('escapes ampersands', () => {
+    expect(escapeHtml('a && b')).toBe('a &amp;&amp; b');
+  });
+});
+
+describe('CodeViewer string regex patterns', () => {
+  // Test the improved string patterns
+  const doubleQuotePattern = /"(?:[^"\\]|\\.)*"/;
+  const singleQuotePattern = /'(?:[^'\\]|\\.)*'/;
+  const backtickPattern = /`(?:[^`\\]|\\.)*`/;
+
+  describe('double-quoted strings', () => {
+    it('matches simple double-quoted strings', () => {
+      expect('"hello"'.match(doubleQuotePattern)?.[0]).toBe('"hello"');
+    });
+
+    it('matches strings with escaped quotes', () => {
+      expect('"He said \\"hello\\""'.match(doubleQuotePattern)?.[0]).toBe('"He said \\"hello\\""');
+    });
+
+    it('matches strings with escaped backslashes', () => {
+      expect('"path\\\\to\\\\file"'.match(doubleQuotePattern)?.[0]).toBe('"path\\\\to\\\\file"');
+    });
+
+    it('matches empty strings', () => {
+      expect('""'.match(doubleQuotePattern)?.[0]).toBe('""');
+    });
+  });
+
+  describe('single-quoted strings', () => {
+    it('matches simple single-quoted strings', () => {
+      expect("'hello'".match(singleQuotePattern)?.[0]).toBe("'hello'");
+    });
+
+    it('matches strings with escaped quotes', () => {
+      expect("'It\\'s fine'".match(singleQuotePattern)?.[0]).toBe("'It\\'s fine'");
+    });
+
+    it('matches empty strings', () => {
+      expect("''".match(singleQuotePattern)?.[0]).toBe("''");
+    });
+  });
+
+  describe('backtick strings', () => {
+    it('matches simple backtick strings', () => {
+      expect('`hello`'.match(backtickPattern)?.[0]).toBe('`hello`');
+    });
+
+    it('matches strings with escaped backticks', () => {
+      expect('`use \\`code\\``'.match(backtickPattern)?.[0]).toBe('`use \\`code\\``');
+    });
+
+    it('matches empty strings', () => {
+      expect('``'.match(backtickPattern)?.[0]).toBe('``');
+    });
+  });
+});
diff --git a/web/app/components/CodeViewer.tsx b/web/app/components/CodeViewer.tsx
@@ -82,39 +82,68 @@ export function CodeViewer({ code, fileName, language }: CodeViewerProps) {
 
   const detectedLanguage = language || getLanguageFromFileName(fileName);
 
-  // Basic syntax highlighting for common tokens
+  // Single-pass syntax highlighting to avoid corrupting HTML class attributes
   const highlightCode = (code: string): string => {
-    // Escape HTML
-    let highlighted = code
+    // Escape HTML helper
+    const escapeHtml = (str: string) => str
       .replace(/&/g, '&amp;')
       .replace(/</g, '&lt;')
-      .replace(/>/g, '&gt;');
-
-    // Common patterns for many languages
-    const patterns = [
-      // Strings
-      { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/g, class: 'text-green-400' },
-      // Comments
-      { regex: /(\/\/.*$)/gm, class: 'text-gray-500 italic' },
-      { regex: /(\/\*[\s\S]*?\*\/)/g, class: 'text-gray-500 italic' },
-      { regex: /(#.*$)/gm, class: 'text-gray-500 italic' },
-      // Numbers
-      { regex: /\b(\d+\.?\d*)\b/g, class: 'text-purple-400' },
-      // Keywords (common across many languages)
-      { regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default)\b/g, class: 'text-blue-400' },
-      // Boolean and null values
-      { regex: /\b(true|false|null|undefined|nil|None|True|False)\b/g, class: 'text-orange-400' },
-      // Function calls (basic)
-      { regex: /(\w+)(?=\s*\()/g, class: 'text-yellow-400' },
-      // Types/Classes (PascalCase)
-      { regex: /\b([A-Z][a-zA-Z0-9]*)\b/g, class: 'text-cyan-400' },
+      .replace(/>/g, '&gt;')
+      .replace(/"/g, '&quot;')
+      .replace(/'/g, '&#039;');
+
+    // Define token patterns with priorities (first match wins)
+    // Order matters: strings and comments first to avoid highlighting inside them
+    const tokenPatterns = [
+      { regex: /"(?:[^"\\]|\\.)*"/, className: 'text-green-400' },                 // double-quoted strings
+      { regex: /'(?:[^'\\]|\\.)*'/, className: 'text-green-400' },                 // single-quoted strings
+      { regex: /`(?:[^`\\]|\\.)*`/, className: 'text-green-400' },                 // backtick strings
+      { regex: /\/\/.*$/, className: 'text-gray-500 italic' },                     // single-line comments
+      { regex: /\/\*[\s\S]*?\*\//, className: 'text-gray-500 italic' },           // multi-line comments
+      { regex: /#.*$/, className: 'text-gray-500 italic' },                        // hash comments
+      { regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default|fn|pub|mod|use|mut|match|loop|impl|trait|where|type|readonly|override)\b/, className: 'text-blue-400' }, // keywords
+      { regex: /\b(true|false|null|undefined|nil|None|True|False|NULL)\b/, className: 'text-orange-400' }, // literals
+      { regex: /\b\d+\.?\d*\b/, className: 'text-purple-400' },                    // numbers
-      { regex: /\b\d+\.?\d*\b/, className: 'text-purple-400' },                    // numbers
+      { regex: /\b\d+(?:\.\d+)?\b/, className: 'text-purple-400' },                // numbers
-      { regex: /\b\d+\.?\d*\b/, className: 'text-purple-400' },                    // numbers
+      { regex: /\b\d+(?:\.\d+)?\b/, className: 'text-purple-400' },                // numbers
+      { regex: /\b[A-Z][a-zA-Z0-9]*\b/, className: 'text-cyan-400' },              // PascalCase (types/classes)
     ];
 
-    patterns.forEach(({ regex, class: className }) => {
-      highlighted = highlighted.replace(regex, `<span class="${className}">$&</span>`);
-    });
+    // Build a combined regex that matches any token
+    const combinedPattern = new RegExp(
+      tokenPatterns.map(p => `(${p.regex.source})`).join('|'),
+      'gm'
+    );
+
+    let result = '';
+    let lastIndex = 0;
+
+    // Single pass through the string
+    for (const match of code.matchAll(combinedPattern)) {
+      // Add non-matched text before this match (escaped)
+      if (match.index! > lastIndex) {
+        result += escapeHtml(code.slice(lastIndex, match.index));
+      }
+
+      // Find which pattern matched (first non-undefined capture group)
+      const matchedText = match[0];
+      let className = '';
+      for (let i = 0; i < tokenPatterns.length; i++) {
+        if (match[i + 1] !== undefined) {
+          className = tokenPatterns[i].className;
+          break;
+        }
+      }
+
+      // Add the highlighted token (escape the matched text too)
+      result += `<span class="${className}">${escapeHtml(matchedText)}</span>`;
+      lastIndex = match.index! + matchedText.length;
+    }
+
+    // Add remaining text after last match
+    if (lastIndex < code.length) {
+      result += escapeHtml(code.slice(lastIndex));
+    }
 
-    return highlighted;
+    return result;
   };
 
   const handleCopy = async () => {

diff --git a/web/app/utils/formatters.test.ts b/web/app/utils/formatters.test.ts
@@ -0,0 +1,76 @@
+import { describe, it, expect } from 'vitest';
+import { escapeHtml, formatLargeText } from './formatters';
+
+describe('escapeHtml', () => {
+  it('escapes ampersands', () => {
+    expect(escapeHtml('a & b')).toBe('a &amp; b');
+  });
+
+  it('escapes less than', () => {
+    expect(escapeHtml('a < b')).toBe('a &lt; b');
+  });
+
+  it('escapes greater than', () => {
+    expect(escapeHtml('a > b')).toBe('a &gt; b');
+  });
+
+  it('escapes double quotes', () => {
+    expect(escapeHtml('He said "hello"')).toBe('He said &quot;hello&quot;');
+  });
+
+  it('escapes single quotes', () => {
+    expect(escapeHtml("It's fine")).toBe("It&#039;s fine");
+  });
+
+  it('escapes all special characters together', () => {
+    expect(escapeHtml('<script>"alert(\'xss\')&"</script>')).toBe(
+      '&lt;script&gt;&quot;alert(&#039;xss&#039;)&amp;&quot;&lt;/script&gt;'
+    );
+  });
+});
+
+describe('formatLargeText', () => {
+  it('returns empty string for empty input', () => {
+    expect(formatLargeText('')).toBe('');
+  });
+
+  it('wraps simple text in paragraph tags', () => {
+    expect(formatLargeText('Hello world')).toBe('<p>Hello world</p>');
+  });
+
+  it('converts single newlines to br tags', () => {
+    expect(formatLargeText('Line1\nLine2')).toBe('<p>Line1<br>Line2</p>');
+  });
+
+  it('converts double newlines to paragraph breaks with proper nesting', () => {
+    const result = formatLargeText('Line1\n\nLine2');
+    expect(result).toBe('<p>Line1</p><p class="mt-3">Line2</p>');
+  });
+
+  it('handles multiple paragraph breaks correctly', () => {
+    const result = formatLargeText('Para1\n\nPara2\n\nPara3');
+    expect(result).toBe('<p>Para1</p><p class="mt-3">Para2</p><p class="mt-3">Para3</p>');
+  });
+
+  it('escapes HTML in the input', () => {
+    const result = formatLargeText('<script>alert("xss")</script>');
+    expect(result).toContain('&lt;script&gt;');
+    expect(result).not.toContain('<script>');
+  });
+
+  it('formats inline code with backticks', () => {
+    const result = formatLargeText('Use `code` here');
+    expect(result).toContain('<code');
+    expect(result).toContain('>code</code>');
+  });
+
+  it('formats bold text', () => {
+    const result = formatLargeText('This is **bold** text');
+    expect(result).toContain('<strong>bold</strong>');
+  });
+
+  it('formats italic text', () => {
+    const result = formatLargeText('This is *italic* text');
+    expect(result).toContain('<em>italic</em>');
+  });
+});
diff --git a/web/app/utils/formatters.ts b/web/app/utils/formatters.ts
@@ -37,49 +37,38 @@ export function formatJSON(obj: any, maxLength: number = 1000): string {
  * Escapes HTML characters to prevent XSS
  */
 export function escapeHtml(text: string): string {
-  const div = document.createElement('div');
-  div.textContent = text;
-  return div.innerHTML;
+  return text
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&#039;');
 }
 
 /**
  * Formats large text with proper line breaks and structure, optimized for the new conversation flow
  */
 export function formatLargeText(text: string): string {
   if (!text) return '';
-  
+
   // Escape HTML first
   const escaped = escapeHtml(text);
-  
-  // Format the text with proper spacing and structure
-  return escaped
-    // Preserve existing double line breaks
-    .replace(/\n\n/g, '<br><br>')
-    // Convert single line breaks to single <br> tags
+
+  // Simple, safe formatting - just handle line breaks and basic markdown
+  const formatted = escaped
+    // Preserve existing double line breaks as paragraph breaks
+    .replace(/\n\n/g, '</p><p class="mt-3">')
+    // Convert single line breaks to <br> tags
     .replace(/\n/g, '<br>')
-    // Format bullet points with modern styling
-    .replace(/^(\s*)([-*•])\s+(.+)$/gm, '$1<span class="inline-flex items-center space-x-2"><span class="w-1.5 h-1.5 bg-blue-500 rounded-full flex-shrink-0"></span><span>$3</span></span>')
-    // Format numbered lists with modern styling
-    .replace(/^(\s*)(\d+)\.\s+(.+)$/gm, '$1<span class="inline-flex items-center space-x-2"><span class="w-5 h-5 bg-blue-100 text-blue-700 rounded-full flex items-center justify-center text-xs font-semibold">$2</span><span>$3</span></span>')
-    // Format headers with better typography
-    .replace(/^([A-Z][^<\n]*:)(<br>|$)/gm, '<div class="font-semibold text-gray-900 mt-4 mb-2 border-b border-gray-200 pb-1">$1</div>$2')
-    // Format code blocks with better styling
-    .replace(/\b([A-Z_]{3,})\b/g, '<code class="bg-gradient-to-r from-gray-100 to-blue-50 border border-gray-200 px-2 py-0.5 rounded-md text-xs text-blue-700 font-mono font-medium">$1</code>')
-    // Format file paths and technical terms
-    .replace(/\b([a-zA-Z0-9_-]+\.[a-zA-Z]{2,4})\b/g, '<span class="bg-slate-100 text-slate-700 px-1.5 py-0.5 rounded text-xs font-mono border border-slate-200">$1</span>')
-    // Format URLs with modern link styling
-    .replace(/(https?:\/\/[^\s<]+)/g, '<a href="$1" class="text-blue-600 hover:text-blue-800 underline underline-offset-2 decoration-blue-300 hover:decoration-blue-500 transition-colors font-medium" target="_blank" rel="noopener noreferrer">$1</a>')
-    // Format quoted text
-    .replace(/^(\s*)([""](.+?)[""])/gm, '$1<blockquote class="border-l-4 border-blue-200 bg-blue-50 pl-4 py-2 my-2 italic text-gray-700 rounded-r">$3</blockquote>')
-    // Add proper spacing around paragraphs
-    .replace(/(<br><br>)/g, '<div class="my-4"></div>')
-    // Clean up any excessive spacing
-    .replace(/(<br>\s*){3,}/g, '<br><br>')
-    // Format emphasis patterns
-    .replace(/\*\*([^*]+)\*\*/g, '<strong class="font-semibold text-gray-900">$1</strong>')
-    .replace(/\*([^*]+)\*/g, '<em class="italic text-gray-700">$1</em>')
-    // Format inline code
-    .replace(/`([^`]+)`/g, '<code class="bg-gray-100 text-gray-800 px-1.5 py-0.5 rounded text-sm font-mono border border-gray-200">$1</code>');
+    // Format inline code (backticks)
+    .replace(/`([^`]+)`/g, '<code class="bg-gray-100 text-gray-800 px-1.5 py-0.5 rounded text-sm font-mono">$1</code>')
+    // Format bold text
+    .replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>')
+    // Format italic text
+    .replace(/\*([^*]+)\*/g, '<em>$1</em>');
+
+  // Wrap in paragraph tags
+  return `<p>${formatted}</p>`;
 }
 
 /**