Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions web/app/components/CodeViewer.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import { describe, it, expect } from 'vitest';

// Test the escapeHtml and string regex patterns used in CodeViewer
// We test the logic directly since the component uses internal functions

describe('CodeViewer escapeHtml', () => {
// Replicate the escapeHtml function from CodeViewer
const escapeHtml = (str: string) => str
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');

it('escapes double quotes for attribute safety', () => {
expect(escapeHtml('class="foo"')).toBe('class=&quot;foo&quot;');
});

it('escapes single quotes for attribute safety', () => {
expect(escapeHtml("class='foo'")).toBe("class=&#039;foo&#039;");
});

it('escapes HTML tags', () => {
expect(escapeHtml('<div>')).toBe('&lt;div&gt;');
});

it('escapes ampersands', () => {
expect(escapeHtml('a && b')).toBe('a &amp;&amp; b');
});
});

describe('CodeViewer string regex patterns', () => {
// Test the improved string patterns
const doubleQuotePattern = /"(?:[^"\\]|\\.)*"/;
const singleQuotePattern = /'(?:[^'\\]|\\.)*'/;
const backtickPattern = /`(?:[^`\\]|\\.)*`/;

describe('double-quoted strings', () => {
it('matches simple double-quoted strings', () => {
expect('"hello"'.match(doubleQuotePattern)?.[0]).toBe('"hello"');
});

it('matches strings with escaped quotes', () => {
expect('"He said \\"hello\\""'.match(doubleQuotePattern)?.[0]).toBe('"He said \\"hello\\""');
});

it('matches strings with escaped backslashes', () => {
expect('"path\\\\to\\\\file"'.match(doubleQuotePattern)?.[0]).toBe('"path\\\\to\\\\file"');
});

it('matches empty strings', () => {
expect('""'.match(doubleQuotePattern)?.[0]).toBe('""');
});
});

describe('single-quoted strings', () => {
it('matches simple single-quoted strings', () => {
expect("'hello'".match(singleQuotePattern)?.[0]).toBe("'hello'");
});

it('matches strings with escaped quotes', () => {
expect("'It\\'s fine'".match(singleQuotePattern)?.[0]).toBe("'It\\'s fine'");
});

it('matches empty strings', () => {
expect("''".match(singleQuotePattern)?.[0]).toBe("''");
});
});

describe('backtick strings', () => {
it('matches simple backtick strings', () => {
expect('`hello`'.match(backtickPattern)?.[0]).toBe('`hello`');
});

it('matches strings with escaped backticks', () => {
expect('`use \\`code\\``'.match(backtickPattern)?.[0]).toBe('`use \\`code\\``');
});

it('matches empty strings', () => {
expect('``'.match(backtickPattern)?.[0]).toBe('``');
});
});
});
83 changes: 56 additions & 27 deletions web/app/components/CodeViewer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -82,39 +82,68 @@ export function CodeViewer({ code, fileName, language }: CodeViewerProps) {

const detectedLanguage = language || getLanguageFromFileName(fileName);

// Basic syntax highlighting for common tokens
// Single-pass syntax highlighting to avoid corrupting HTML class attributes
const highlightCode = (code: string): string => {
// Escape HTML
let highlighted = code
// Escape HTML helper
const escapeHtml = (str: string) => str
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');

// Common patterns for many languages
const patterns = [
// Strings
{ regex: /(["'`])(?:(?=(\\?))\2.)*?\1/g, class: 'text-green-400' },
// Comments
{ regex: /(\/\/.*$)/gm, class: 'text-gray-500 italic' },
{ regex: /(\/\*[\s\S]*?\*\/)/g, class: 'text-gray-500 italic' },
{ regex: /(#.*$)/gm, class: 'text-gray-500 italic' },
// Numbers
{ regex: /\b(\d+\.?\d*)\b/g, class: 'text-purple-400' },
// Keywords (common across many languages)
{ regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default)\b/g, class: 'text-blue-400' },
// Boolean and null values
{ regex: /\b(true|false|null|undefined|nil|None|True|False)\b/g, class: 'text-orange-400' },
// Function calls (basic)
{ regex: /(\w+)(?=\s*\()/g, class: 'text-yellow-400' },
// Types/Classes (PascalCase)
{ regex: /\b([A-Z][a-zA-Z0-9]*)\b/g, class: 'text-cyan-400' },
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');

// Define token patterns with priorities (first match wins)
// Order matters: strings and comments first to avoid highlighting inside them
const tokenPatterns = [
{ regex: /"(?:[^"\\]|\\.)*"/, className: 'text-green-400' }, // double-quoted strings
{ regex: /'(?:[^'\\]|\\.)*'/, className: 'text-green-400' }, // single-quoted strings
{ regex: /`(?:[^`\\]|\\.)*`/, className: 'text-green-400' }, // backtick strings
{ regex: /\/\/.*$/, className: 'text-gray-500 italic' }, // single-line comments
{ regex: /\/\*[\s\S]*?\*\//, className: 'text-gray-500 italic' }, // multi-line comments
{ regex: /#.*$/, className: 'text-gray-500 italic' }, // hash comments
{ regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default|fn|pub|mod|use|mut|match|loop|impl|trait|where|type|readonly|override)\b/, className: 'text-blue-400' }, // keywords
{ regex: /\b(true|false|null|undefined|nil|None|True|False|NULL)\b/, className: 'text-orange-400' }, // literals
{ regex: /\b\d+\.?\d*\b/, className: 'text-purple-400' }, // numbers
Copy link

Copilot AI Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The number regex pattern /\b\d+\.?\d*\b/ will match numbers like "123." (trailing decimal point with no digits after). This is technically valid in some contexts but may not be intended.

Consider using /\b\d+(?:\.\d+)?\b/ which requires at least one digit after the decimal point, or /\b\d+\.?\d+\b|\b\d+\b/ to match either integers or proper decimals.

Examples of current behavior:

  • "123." → matches (may be unintended)
  • "123.456" → matches ✓
  • "123" → matches ✓
  • ".456" → no match ✓
Suggested change
{ regex: /\b\d+\.?\d*\b/, className: 'text-purple-400' }, // numbers
{ regex: /\b\d+(?:\.\d+)?\b/, className: 'text-purple-400' }, // numbers

Copilot uses AI. Check for mistakes.
{ regex: /\b[A-Z][a-zA-Z0-9]*\b/, className: 'text-cyan-400' }, // PascalCase (types/classes)
];

patterns.forEach(({ regex, class: className }) => {
highlighted = highlighted.replace(regex, `<span class="${className}">$&</span>`);
});
// Build a combined regex that matches any token
const combinedPattern = new RegExp(
tokenPatterns.map(p => `(${p.regex.source})`).join('|'),
'gm'
);

let result = '';
let lastIndex = 0;

// Single pass through the string
for (const match of code.matchAll(combinedPattern)) {
// Add non-matched text before this match (escaped)
if (match.index! > lastIndex) {
result += escapeHtml(code.slice(lastIndex, match.index));
Comment on lines +122 to +123
Copy link

Copilot AI Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The non-null assertion operator ! is used on match.index which is guaranteed to be defined for successful matches from matchAll(). However, TypeScript's type definitions may not reflect this guarantee.

While this is technically safe, consider using nullish coalescing for better defensive programming:

if (match.index ?? 0 > lastIndex) {
  result += escapeHtml(code.slice(lastIndex, match.index ?? 0));
}

Or assert once at the start of the loop:

const index = match.index!;
if (index > lastIndex) {
  result += escapeHtml(code.slice(lastIndex, index));
}

Copilot uses AI. Check for mistakes.
}

// Find which pattern matched (first non-undefined capture group)
const matchedText = match[0];
let className = '';
for (let i = 0; i < tokenPatterns.length; i++) {
if (match[i + 1] !== undefined) {
className = tokenPatterns[i].className;
break;
}
}
Comment on lines +129 to +134
Copy link

Copilot AI Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The combined regex pattern creates a large number of capture groups (8 patterns = 8 capture groups). For every match, all capture groups are checked in the loop (lines 125-130) even though only one will ever be defined.

Consider optimizing by using named capture groups or storing pattern indices to avoid the linear search:

const tokenPatterns = [
  { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/, className: 'text-green-400', name: 'string' },
  // ... other patterns
];

// Then use a Map for O(1) lookup
const patternMap = new Map(tokenPatterns.map((p, i) => [i, p]));

Or simply store the result of the loop since you're already iterating through patterns when finding which matched.

Copilot uses AI. Check for mistakes.

// Add the highlighted token (escape the matched text too)
result += `<span class="${className}">${escapeHtml(matchedText)}</span>`;
lastIndex = match.index! + matchedText.length;
}

// Add remaining text after last match
if (lastIndex < code.length) {
result += escapeHtml(code.slice(lastIndex));
}

return highlighted;
return result;
};

const handleCopy = async () => {
Expand Down
76 changes: 76 additions & 0 deletions web/app/utils/formatters.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import { describe, it, expect } from 'vitest';
import { escapeHtml, formatLargeText } from './formatters';

describe('escapeHtml', () => {
it('escapes ampersands', () => {
expect(escapeHtml('a & b')).toBe('a &amp; b');
});

it('escapes less than', () => {
expect(escapeHtml('a < b')).toBe('a &lt; b');
});

it('escapes greater than', () => {
expect(escapeHtml('a > b')).toBe('a &gt; b');
});

it('escapes double quotes', () => {
expect(escapeHtml('He said "hello"')).toBe('He said &quot;hello&quot;');
});

it('escapes single quotes', () => {
expect(escapeHtml("It's fine")).toBe("It&#039;s fine");
});

it('escapes all special characters together', () => {
expect(escapeHtml('<script>"alert(\'xss\')&"</script>')).toBe(
'&lt;script&gt;&quot;alert(&#039;xss&#039;)&amp;&quot;&lt;/script&gt;'
);
});
});

describe('formatLargeText', () => {
it('returns empty string for empty input', () => {
expect(formatLargeText('')).toBe('');
});

it('wraps simple text in paragraph tags', () => {
expect(formatLargeText('Hello world')).toBe('<p>Hello world</p>');
});

it('converts single newlines to br tags', () => {
expect(formatLargeText('Line1\nLine2')).toBe('<p>Line1<br>Line2</p>');
});

it('converts double newlines to paragraph breaks with proper nesting', () => {
const result = formatLargeText('Line1\n\nLine2');
expect(result).toBe('<p>Line1</p><p class="mt-3">Line2</p>');
});

it('handles multiple paragraph breaks correctly', () => {
const result = formatLargeText('Para1\n\nPara2\n\nPara3');
expect(result).toBe('<p>Para1</p><p class="mt-3">Para2</p><p class="mt-3">Para3</p>');
});

it('escapes HTML in the input', () => {
const result = formatLargeText('<script>alert("xss")</script>');
expect(result).toContain('&lt;script&gt;');
expect(result).not.toContain('<script>');
});

it('formats inline code with backticks', () => {
const result = formatLargeText('Use `code` here');
expect(result).toContain('<code');
expect(result).toContain('>code</code>');
});

it('formats bold text', () => {
const result = formatLargeText('This is **bold** text');
expect(result).toContain('<strong>bold</strong>');
});

it('formats italic text', () => {
const result = formatLargeText('This is *italic* text');
expect(result).toContain('<em>italic</em>');
});
});
55 changes: 22 additions & 33 deletions web/app/utils/formatters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,49 +37,38 @@ export function formatJSON(obj: any, maxLength: number = 1000): string {
* Escapes HTML characters to prevent XSS
*/
export function escapeHtml(text: string): string {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');
}

/**
* Formats large text with proper line breaks and structure, optimized for the new conversation flow
*/
export function formatLargeText(text: string): string {
if (!text) return '';

// Escape HTML first
const escaped = escapeHtml(text);
// Format the text with proper spacing and structure
return escaped
// Preserve existing double line breaks
.replace(/\n\n/g, '<br><br>')
// Convert single line breaks to single <br> tags

// Simple, safe formatting - just handle line breaks and basic markdown
const formatted = escaped
// Preserve existing double line breaks as paragraph breaks
.replace(/\n\n/g, '</p><p class="mt-3">')
// Convert single line breaks to <br> tags
.replace(/\n/g, '<br>')
// Format bullet points with modern styling
.replace(/^(\s*)([-*•])\s+(.+)$/gm, '$1<span class="inline-flex items-center space-x-2"><span class="w-1.5 h-1.5 bg-blue-500 rounded-full flex-shrink-0"></span><span>$3</span></span>')
// Format numbered lists with modern styling
.replace(/^(\s*)(\d+)\.\s+(.+)$/gm, '$1<span class="inline-flex items-center space-x-2"><span class="w-5 h-5 bg-blue-100 text-blue-700 rounded-full flex items-center justify-center text-xs font-semibold">$2</span><span>$3</span></span>')
// Format headers with better typography
.replace(/^([A-Z][^<\n]*:)(<br>|$)/gm, '<div class="font-semibold text-gray-900 mt-4 mb-2 border-b border-gray-200 pb-1">$1</div>$2')
// Format code blocks with better styling
.replace(/\b([A-Z_]{3,})\b/g, '<code class="bg-gradient-to-r from-gray-100 to-blue-50 border border-gray-200 px-2 py-0.5 rounded-md text-xs text-blue-700 font-mono font-medium">$1</code>')
// Format file paths and technical terms
.replace(/\b([a-zA-Z0-9_-]+\.[a-zA-Z]{2,4})\b/g, '<span class="bg-slate-100 text-slate-700 px-1.5 py-0.5 rounded text-xs font-mono border border-slate-200">$1</span>')
// Format URLs with modern link styling
.replace(/(https?:\/\/[^\s<]+)/g, '<a href="$1" class="text-blue-600 hover:text-blue-800 underline underline-offset-2 decoration-blue-300 hover:decoration-blue-500 transition-colors font-medium" target="_blank" rel="noopener noreferrer">$1</a>')
// Format quoted text
.replace(/^(\s*)([""](.+?)[""])/gm, '$1<blockquote class="border-l-4 border-blue-200 bg-blue-50 pl-4 py-2 my-2 italic text-gray-700 rounded-r">$3</blockquote>')
// Add proper spacing around paragraphs
.replace(/(<br><br>)/g, '<div class="my-4"></div>')
// Clean up any excessive spacing
.replace(/(<br>\s*){3,}/g, '<br><br>')
// Format emphasis patterns
.replace(/\*\*([^*]+)\*\*/g, '<strong class="font-semibold text-gray-900">$1</strong>')
.replace(/\*([^*]+)\*/g, '<em class="italic text-gray-700">$1</em>')
// Format inline code
.replace(/`([^`]+)`/g, '<code class="bg-gray-100 text-gray-800 px-1.5 py-0.5 rounded text-sm font-mono border border-gray-200">$1</code>');
// Format inline code (backticks)
.replace(/`([^`]+)`/g, '<code class="bg-gray-100 text-gray-800 px-1.5 py-0.5 rounded text-sm font-mono">$1</code>')
// Format bold text
.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>')
// Format italic text
.replace(/\*([^*]+)\*/g, '<em>$1</em>');

// Wrap in paragraph tags
return `<p>${formatted}</p>`;
}

/**
Expand Down
Loading