From a0f3637deb4e2bcab3674313dafac21d08348b48 Mon Sep 17 00:00:00 2001 From: EJ Campbell Date: Thu, 27 Nov 2025 13:15:24 -0800 Subject: [PATCH 1/7] Fix code viewer HTML class attribute corruption - Replace sequential regex replacements with single-pass tokenizer in CodeViewer.tsx highlightCode() function - The old approach applied patterns sequentially, causing later patterns to match numbers inside class attributes (e.g., "400" in "text-purple-400") - New approach: build combined regex, iterate matches once, escape HTML on matched tokens only - Also fix escapeHtml in formatters.ts to not use document.createElement (fails during SSR) and simplify formatLargeText to avoid over-formatting --- web/app/components/CodeViewer.tsx | 75 ++++++++++++++++++++----------- web/app/utils/formatters.ts | 52 +++++++++------------ 2 files changed, 70 insertions(+), 57 deletions(-) diff --git a/web/app/components/CodeViewer.tsx b/web/app/components/CodeViewer.tsx index f9f4aae6..a4b0dcfd 100644 --- a/web/app/components/CodeViewer.tsx +++ b/web/app/components/CodeViewer.tsx @@ -82,39 +82,64 @@ export function CodeViewer({ code, fileName, language }: CodeViewerProps) { const detectedLanguage = language || getLanguageFromFileName(fileName); - // Basic syntax highlighting for common tokens + // Single-pass syntax highlighting to avoid corrupting HTML class attributes const highlightCode = (code: string): string => { - // Escape HTML - let highlighted = code + // Escape HTML helper + const escapeHtml = (str: string) => str .replace(/&/g, '&') .replace(//g, '>'); - // Common patterns for many languages - const patterns = [ - // Strings - { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/g, class: 'text-green-400' }, - // Comments - { regex: /(\/\/.*$)/gm, class: 'text-gray-500 italic' }, - { regex: /(\/\*[\s\S]*?\*\/)/g, class: 'text-gray-500 italic' }, - { regex: /(#.*$)/gm, class: 'text-gray-500 italic' }, - // Numbers - { regex: /\b(\d+\.?\d*)\b/g, class: 'text-purple-400' }, - // Keywords (common across many languages) - { regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default)\b/g, class: 'text-blue-400' }, - // Boolean and null values - { regex: /\b(true|false|null|undefined|nil|None|True|False)\b/g, class: 'text-orange-400' }, - // Function calls (basic) - { regex: /(\w+)(?=\s*\()/g, class: 'text-yellow-400' }, - // Types/Classes (PascalCase) - { regex: /\b([A-Z][a-zA-Z0-9]*)\b/g, class: 'text-cyan-400' }, + // Define token patterns with priorities (first match wins) + // Order matters: strings and comments first to avoid highlighting inside them + const tokenPatterns = [ + { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/, className: 'text-green-400' }, // strings + { regex: /\/\/.*$/, className: 'text-gray-500 italic' }, // single-line comments + { regex: /\/\*[\s\S]*?\*\//, className: 'text-gray-500 italic' }, // multi-line comments + { regex: /#.*$/, className: 'text-gray-500 italic' }, // hash comments + { regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default|fn|pub|mod|use|mut|match|loop|impl|trait|where|type|readonly|override)\b/, className: 'text-blue-400' }, // keywords + { regex: /\b(true|false|null|undefined|nil|None|True|False|NULL)\b/, className: 'text-orange-400' }, // literals + { regex: /\b\d+\.?\d*\b/, className: 'text-purple-400' }, // numbers + { regex: /\b[A-Z][a-zA-Z0-9]*\b/, className: 'text-cyan-400' }, // PascalCase (types/classes) ]; - patterns.forEach(({ regex, class: className }) => { - highlighted = highlighted.replace(regex, `$&`); - }); + // Build a combined regex that matches any token + const combinedPattern = new RegExp( + tokenPatterns.map(p => `(${p.regex.source})`).join('|'), + 'gm' + ); - return highlighted; + let result = ''; + let lastIndex = 0; + + // Single pass through the string + for (const match of code.matchAll(combinedPattern)) { + // Add non-matched text before this match (escaped) + if (match.index! > lastIndex) { + result += escapeHtml(code.slice(lastIndex, match.index)); + } + + // Find which pattern matched (first non-undefined capture group) + const matchedText = match[0]; + let className = ''; + for (let i = 0; i < tokenPatterns.length; i++) { + if (match[i + 1] !== undefined) { + className = tokenPatterns[i].className; + break; + } + } + + // Add the highlighted token (escape the matched text too) + result += `${escapeHtml(matchedText)}`; + lastIndex = match.index! + matchedText.length; + } + + // Add remaining text after last match + if (lastIndex < code.length) { + result += escapeHtml(code.slice(lastIndex)); + } + + return result; }; const handleCopy = async () => { diff --git a/web/app/utils/formatters.ts b/web/app/utils/formatters.ts index 4b02e5a3..a18779e1 100644 --- a/web/app/utils/formatters.ts +++ b/web/app/utils/formatters.ts @@ -37,9 +37,12 @@ export function formatJSON(obj: any, maxLength: number = 1000): string { * Escapes HTML characters to prevent XSS */ export function escapeHtml(text: string): string { - const div = document.createElement('div'); - div.textContent = text; - return div.innerHTML; + return text + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); } /** @@ -47,39 +50,24 @@ export function escapeHtml(text: string): string { */ export function formatLargeText(text: string): string { if (!text) return ''; - + // Escape HTML first const escaped = escapeHtml(text); - - // Format the text with proper spacing and structure + + // Simple, safe formatting - just handle line breaks and basic markdown return escaped - // Preserve existing double line breaks - .replace(/\n\n/g, '

') - // Convert single line breaks to single
tags + // Preserve existing double line breaks as paragraph breaks + .replace(/\n\n/g, '

') + // Convert single line breaks to
tags .replace(/\n/g, '
') - // Format bullet points with modern styling - .replace(/^(\s*)([-*•])\s+(.+)$/gm, '$1$3') - // Format numbered lists with modern styling - .replace(/^(\s*)(\d+)\.\s+(.+)$/gm, '$1$2$3') - // Format headers with better typography - .replace(/^([A-Z][^<\n]*:)(
|$)/gm, '

$1
$2') - // Format code blocks with better styling - .replace(/\b([A-Z_]{3,})\b/g, '$1') - // Format file paths and technical terms - .replace(/\b([a-zA-Z0-9_-]+\.[a-zA-Z]{2,4})\b/g, '$1') - // Format URLs with modern link styling - .replace(/(https?:\/\/[^\s<]+)/g, '$1') - // Format quoted text - .replace(/^(\s*)([""](.+?)[""])/gm, '$1
$3
') - // Add proper spacing around paragraphs - .replace(/(

)/g, '
') - // Clean up any excessive spacing - .replace(/(
\s*){3,}/g, '

') - // Format emphasis patterns - .replace(/\*\*([^*]+)\*\*/g, '$1') - .replace(/\*([^*]+)\*/g, '$1') - // Format inline code - .replace(/`([^`]+)`/g, '$1'); + // Format inline code (backticks) + .replace(/`([^`]+)`/g, '$1') + // Format bold text + .replace(/\*\*([^*]+)\*\*/g, '$1') + // Format italic text + .replace(/\*([^*]+)\*/g, '$1') + // Wrap in paragraph + .replace(/^(.*)$/, '

$1

'); } /** From 77b4d70ca780f17d3a7f943cd9a879546a13b901 Mon Sep 17 00:00:00 2001 From: EJ Campbell Date: Sat, 20 Dec 2025 09:01:17 -0800 Subject: [PATCH 2/7] Fix Copilot review issues and add tests - Add quote escaping to escapeHtml in CodeViewer.tsx for XSS protection - Fix string regex patterns to properly handle escaped quotes - Use template literal for paragraph wrapping in formatLargeText - Add vitest and tests for escapeHtml, formatLargeText, and string patterns --- web/app/components/CodeViewer.test.tsx | 83 + web/app/components/CodeViewer.tsx | 8 +- web/app/utils/formatters.test.ts | 76 + web/app/utils/formatters.ts | 9 +- web/package-lock.json | 4156 ++++++++++-------------- web/package.json | 7 +- web/vitest.config.ts | 8 + 7 files changed, 1942 insertions(+), 2405 deletions(-) create mode 100644 web/app/components/CodeViewer.test.tsx create mode 100644 web/app/utils/formatters.test.ts create mode 100644 web/vitest.config.ts diff --git a/web/app/components/CodeViewer.test.tsx b/web/app/components/CodeViewer.test.tsx new file mode 100644 index 00000000..be2568ec --- /dev/null +++ b/web/app/components/CodeViewer.test.tsx @@ -0,0 +1,83 @@ +import { describe, it, expect } from 'vitest'; + +// Test the escapeHtml and string regex patterns used in CodeViewer +// We test the logic directly since the component uses internal functions + +describe('CodeViewer escapeHtml', () => { + // Replicate the escapeHtml function from CodeViewer + const escapeHtml = (str: string) => str + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + + it('escapes double quotes for attribute safety', () => { + expect(escapeHtml('class="foo"')).toBe('class="foo"'); + }); + + it('escapes single quotes for attribute safety', () => { + expect(escapeHtml("class='foo'")).toBe("class='foo'"); + }); + + it('escapes HTML tags', () => { + expect(escapeHtml('
')).toBe('<div>'); + }); + + it('escapes ampersands', () => { + expect(escapeHtml('a && b')).toBe('a && b'); + }); +}); + +describe('CodeViewer string regex patterns', () => { + // Test the improved string patterns + const doubleQuotePattern = /"(?:[^"\\]|\\.)*"/; + const singleQuotePattern = /'(?:[^'\\]|\\.)*'/; + const backtickPattern = /`(?:[^`\\]|\\.)*`/; + + describe('double-quoted strings', () => { + it('matches simple double-quoted strings', () => { + expect('"hello"'.match(doubleQuotePattern)?.[0]).toBe('"hello"'); + }); + + it('matches strings with escaped quotes', () => { + expect('"He said \\"hello\\""'.match(doubleQuotePattern)?.[0]).toBe('"He said \\"hello\\""'); + }); + + it('matches strings with escaped backslashes', () => { + expect('"path\\\\to\\\\file"'.match(doubleQuotePattern)?.[0]).toBe('"path\\\\to\\\\file"'); + }); + + it('matches empty strings', () => { + expect('""'.match(doubleQuotePattern)?.[0]).toBe('""'); + }); + }); + + describe('single-quoted strings', () => { + it('matches simple single-quoted strings', () => { + expect("'hello'".match(singleQuotePattern)?.[0]).toBe("'hello'"); + }); + + it('matches strings with escaped quotes', () => { + expect("'It\\'s fine'".match(singleQuotePattern)?.[0]).toBe("'It\\'s fine'"); + }); + + it('matches empty strings', () => { + expect("''".match(singleQuotePattern)?.[0]).toBe("''"); + }); + }); + + describe('backtick strings', () => { + it('matches simple backtick strings', () => { + expect('`hello`'.match(backtickPattern)?.[0]).toBe('`hello`'); + }); + + it('matches strings with escaped backticks', () => { + expect('`use \\`code\\``'.match(backtickPattern)?.[0]).toBe('`use \\`code\\``'); + }); + + it('matches empty strings', () => { + expect('``'.match(backtickPattern)?.[0]).toBe('``'); + }); + }); +}); diff --git a/web/app/components/CodeViewer.tsx b/web/app/components/CodeViewer.tsx index a4b0dcfd..5f9cc4a3 100644 --- a/web/app/components/CodeViewer.tsx +++ b/web/app/components/CodeViewer.tsx @@ -88,12 +88,16 @@ export function CodeViewer({ code, fileName, language }: CodeViewerProps) { const escapeHtml = (str: string) => str .replace(/&/g, '&') .replace(//g, '>'); + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); // Define token patterns with priorities (first match wins) // Order matters: strings and comments first to avoid highlighting inside them const tokenPatterns = [ - { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/, className: 'text-green-400' }, // strings + { regex: /"(?:[^"\\]|\\.)*"/, className: 'text-green-400' }, // double-quoted strings + { regex: /'(?:[^'\\]|\\.)*'/, className: 'text-green-400' }, // single-quoted strings + { regex: /`(?:[^`\\]|\\.)*`/, className: 'text-green-400' }, // backtick strings { regex: /\/\/.*$/, className: 'text-gray-500 italic' }, // single-line comments { regex: /\/\*[\s\S]*?\*\//, className: 'text-gray-500 italic' }, // multi-line comments { regex: /#.*$/, className: 'text-gray-500 italic' }, // hash comments diff --git a/web/app/utils/formatters.test.ts b/web/app/utils/formatters.test.ts new file mode 100644 index 00000000..74a468e6 --- /dev/null +++ b/web/app/utils/formatters.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect } from 'vitest'; +import { escapeHtml, formatLargeText } from './formatters'; + +describe('escapeHtml', () => { + it('escapes ampersands', () => { + expect(escapeHtml('a & b')).toBe('a & b'); + }); + + it('escapes less than', () => { + expect(escapeHtml('a < b')).toBe('a < b'); + }); + + it('escapes greater than', () => { + expect(escapeHtml('a > b')).toBe('a > b'); + }); + + it('escapes double quotes', () => { + expect(escapeHtml('He said "hello"')).toBe('He said "hello"'); + }); + + it('escapes single quotes', () => { + expect(escapeHtml("It's fine")).toBe("It's fine"); + }); + + it('escapes all special characters together', () => { + expect(escapeHtml('')).toBe( + '<script>"alert('xss')&"</script>' + ); + }); +}); + +describe('formatLargeText', () => { + it('returns empty string for empty input', () => { + expect(formatLargeText('')).toBe(''); + }); + + it('wraps simple text in paragraph tags', () => { + expect(formatLargeText('Hello world')).toBe('

Hello world

'); + }); + + it('converts single newlines to br tags', () => { + expect(formatLargeText('Line1\nLine2')).toBe('

Line1
Line2

'); + }); + + it('converts double newlines to paragraph breaks with proper nesting', () => { + const result = formatLargeText('Line1\n\nLine2'); + expect(result).toBe('

Line1

Line2

'); + }); + + it('handles multiple paragraph breaks correctly', () => { + const result = formatLargeText('Para1\n\nPara2\n\nPara3'); + expect(result).toBe('

Para1

Para2

Para3

'); + }); + + it('escapes HTML in the input', () => { + const result = formatLargeText(''); + expect(result).toContain('<script>'); + expect(result).not.toContain('