-
Notifications
You must be signed in to change notification settings - Fork 54
Fix code viewer syntax highlighting corruption #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| import { describe, it, expect } from 'vitest'; | ||
|
|
||
| // Test the escapeHtml and string regex patterns used in CodeViewer | ||
| // We test the logic directly since the component uses internal functions | ||
|
|
||
| describe('CodeViewer escapeHtml', () => { | ||
| // Replicate the escapeHtml function from CodeViewer | ||
| const escapeHtml = (str: string) => str | ||
| .replace(/&/g, '&') | ||
| .replace(/</g, '<') | ||
| .replace(/>/g, '>') | ||
| .replace(/"/g, '"') | ||
| .replace(/'/g, '''); | ||
|
|
||
| it('escapes double quotes for attribute safety', () => { | ||
| expect(escapeHtml('class="foo"')).toBe('class="foo"'); | ||
| }); | ||
|
|
||
| it('escapes single quotes for attribute safety', () => { | ||
| expect(escapeHtml("class='foo'")).toBe("class='foo'"); | ||
| }); | ||
|
|
||
| it('escapes HTML tags', () => { | ||
| expect(escapeHtml('<div>')).toBe('<div>'); | ||
| }); | ||
|
|
||
| it('escapes ampersands', () => { | ||
| expect(escapeHtml('a && b')).toBe('a && b'); | ||
| }); | ||
| }); | ||
|
|
||
| describe('CodeViewer string regex patterns', () => { | ||
| // Test the improved string patterns | ||
| const doubleQuotePattern = /"(?:[^"\\]|\\.)*"/; | ||
| const singleQuotePattern = /'(?:[^'\\]|\\.)*'/; | ||
| const backtickPattern = /`(?:[^`\\]|\\.)*`/; | ||
|
|
||
| describe('double-quoted strings', () => { | ||
| it('matches simple double-quoted strings', () => { | ||
| expect('"hello"'.match(doubleQuotePattern)?.[0]).toBe('"hello"'); | ||
| }); | ||
|
|
||
| it('matches strings with escaped quotes', () => { | ||
| expect('"He said \\"hello\\""'.match(doubleQuotePattern)?.[0]).toBe('"He said \\"hello\\""'); | ||
| }); | ||
|
|
||
| it('matches strings with escaped backslashes', () => { | ||
| expect('"path\\\\to\\\\file"'.match(doubleQuotePattern)?.[0]).toBe('"path\\\\to\\\\file"'); | ||
| }); | ||
|
|
||
| it('matches empty strings', () => { | ||
| expect('""'.match(doubleQuotePattern)?.[0]).toBe('""'); | ||
| }); | ||
| }); | ||
|
|
||
| describe('single-quoted strings', () => { | ||
| it('matches simple single-quoted strings', () => { | ||
| expect("'hello'".match(singleQuotePattern)?.[0]).toBe("'hello'"); | ||
| }); | ||
|
|
||
| it('matches strings with escaped quotes', () => { | ||
| expect("'It\\'s fine'".match(singleQuotePattern)?.[0]).toBe("'It\\'s fine'"); | ||
| }); | ||
|
|
||
| it('matches empty strings', () => { | ||
| expect("''".match(singleQuotePattern)?.[0]).toBe("''"); | ||
| }); | ||
| }); | ||
|
|
||
| describe('backtick strings', () => { | ||
| it('matches simple backtick strings', () => { | ||
| expect('`hello`'.match(backtickPattern)?.[0]).toBe('`hello`'); | ||
| }); | ||
|
|
||
| it('matches strings with escaped backticks', () => { | ||
| expect('`use \\`code\\``'.match(backtickPattern)?.[0]).toBe('`use \\`code\\``'); | ||
| }); | ||
|
|
||
| it('matches empty strings', () => { | ||
| expect('``'.match(backtickPattern)?.[0]).toBe('``'); | ||
| }); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,39 +82,68 @@ export function CodeViewer({ code, fileName, language }: CodeViewerProps) { | |
|
|
||
| const detectedLanguage = language || getLanguageFromFileName(fileName); | ||
|
|
||
| // Basic syntax highlighting for common tokens | ||
| // Single-pass syntax highlighting to avoid corrupting HTML class attributes | ||
| const highlightCode = (code: string): string => { | ||
| // Escape HTML | ||
| let highlighted = code | ||
| // Escape HTML helper | ||
| const escapeHtml = (str: string) => str | ||
| .replace(/&/g, '&') | ||
| .replace(/</g, '<') | ||
| .replace(/>/g, '>'); | ||
|
|
||
| // Common patterns for many languages | ||
| const patterns = [ | ||
| // Strings | ||
| { regex: /(["'`])(?:(?=(\\?))\2.)*?\1/g, class: 'text-green-400' }, | ||
| // Comments | ||
| { regex: /(\/\/.*$)/gm, class: 'text-gray-500 italic' }, | ||
| { regex: /(\/\*[\s\S]*?\*\/)/g, class: 'text-gray-500 italic' }, | ||
| { regex: /(#.*$)/gm, class: 'text-gray-500 italic' }, | ||
| // Numbers | ||
| { regex: /\b(\d+\.?\d*)\b/g, class: 'text-purple-400' }, | ||
| // Keywords (common across many languages) | ||
| { regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default)\b/g, class: 'text-blue-400' }, | ||
| // Boolean and null values | ||
| { regex: /\b(true|false|null|undefined|nil|None|True|False)\b/g, class: 'text-orange-400' }, | ||
| // Function calls (basic) | ||
| { regex: /(\w+)(?=\s*\()/g, class: 'text-yellow-400' }, | ||
| // Types/Classes (PascalCase) | ||
| { regex: /\b([A-Z][a-zA-Z0-9]*)\b/g, class: 'text-cyan-400' }, | ||
| .replace(/>/g, '>') | ||
| .replace(/"/g, '"') | ||
| .replace(/'/g, '''); | ||
|
|
||
| // Define token patterns with priorities (first match wins) | ||
| // Order matters: strings and comments first to avoid highlighting inside them | ||
| const tokenPatterns = [ | ||
| { regex: /"(?:[^"\\]|\\.)*"/, className: 'text-green-400' }, // double-quoted strings | ||
| { regex: /'(?:[^'\\]|\\.)*'/, className: 'text-green-400' }, // single-quoted strings | ||
| { regex: /`(?:[^`\\]|\\.)*`/, className: 'text-green-400' }, // backtick strings | ||
| { regex: /\/\/.*$/, className: 'text-gray-500 italic' }, // single-line comments | ||
| { regex: /\/\*[\s\S]*?\*\//, className: 'text-gray-500 italic' }, // multi-line comments | ||
| { regex: /#.*$/, className: 'text-gray-500 italic' }, // hash comments | ||
| { regex: /\b(function|const|let|var|if|else|for|while|return|class|import|export|from|async|await|def|elif|except|finally|lambda|with|as|raise|del|global|nonlocal|assert|break|continue|try|catch|throw|new|this|super|extends|implements|interface|abstract|static|public|private|protected|void|int|string|boolean|float|double|char|long|short|byte|enum|struct|typedef|union|namespace|using|package|goto|switch|case|default|fn|pub|mod|use|mut|match|loop|impl|trait|where|type|readonly|override)\b/, className: 'text-blue-400' }, // keywords | ||
| { regex: /\b(true|false|null|undefined|nil|None|True|False|NULL)\b/, className: 'text-orange-400' }, // literals | ||
| { regex: /\b\d+\.?\d*\b/, className: 'text-purple-400' }, // numbers | ||
| { regex: /\b[A-Z][a-zA-Z0-9]*\b/, className: 'text-cyan-400' }, // PascalCase (types/classes) | ||
| ]; | ||
|
|
||
| patterns.forEach(({ regex, class: className }) => { | ||
| highlighted = highlighted.replace(regex, `<span class="${className}">$&</span>`); | ||
| }); | ||
| // Build a combined regex that matches any token | ||
| const combinedPattern = new RegExp( | ||
| tokenPatterns.map(p => `(${p.regex.source})`).join('|'), | ||
| 'gm' | ||
| ); | ||
|
|
||
| let result = ''; | ||
| let lastIndex = 0; | ||
|
|
||
| // Single pass through the string | ||
| for (const match of code.matchAll(combinedPattern)) { | ||
| // Add non-matched text before this match (escaped) | ||
| if (match.index! > lastIndex) { | ||
| result += escapeHtml(code.slice(lastIndex, match.index)); | ||
|
Comment on lines
+122
to
+123
|
||
| } | ||
|
|
||
| // Find which pattern matched (first non-undefined capture group) | ||
| const matchedText = match[0]; | ||
| let className = ''; | ||
| for (let i = 0; i < tokenPatterns.length; i++) { | ||
| if (match[i + 1] !== undefined) { | ||
| className = tokenPatterns[i].className; | ||
| break; | ||
| } | ||
| } | ||
|
Comment on lines
+129
to
+134
|
||
|
|
||
| // Add the highlighted token (escape the matched text too) | ||
| result += `<span class="${className}">${escapeHtml(matchedText)}</span>`; | ||
| lastIndex = match.index! + matchedText.length; | ||
| } | ||
|
|
||
| // Add remaining text after last match | ||
| if (lastIndex < code.length) { | ||
| result += escapeHtml(code.slice(lastIndex)); | ||
| } | ||
|
|
||
| return highlighted; | ||
| return result; | ||
| }; | ||
|
|
||
| const handleCopy = async () => { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| import { describe, it, expect } from 'vitest'; | ||
| import { escapeHtml, formatLargeText } from './formatters'; | ||
|
|
||
| describe('escapeHtml', () => { | ||
| it('escapes ampersands', () => { | ||
| expect(escapeHtml('a & b')).toBe('a & b'); | ||
| }); | ||
|
|
||
| it('escapes less than', () => { | ||
| expect(escapeHtml('a < b')).toBe('a < b'); | ||
| }); | ||
|
|
||
| it('escapes greater than', () => { | ||
| expect(escapeHtml('a > b')).toBe('a > b'); | ||
| }); | ||
|
|
||
| it('escapes double quotes', () => { | ||
| expect(escapeHtml('He said "hello"')).toBe('He said "hello"'); | ||
| }); | ||
|
|
||
| it('escapes single quotes', () => { | ||
| expect(escapeHtml("It's fine")).toBe("It's fine"); | ||
| }); | ||
|
|
||
| it('escapes all special characters together', () => { | ||
| expect(escapeHtml('<script>"alert(\'xss\')&"</script>')).toBe( | ||
| '<script>"alert('xss')&"</script>' | ||
| ); | ||
| }); | ||
| }); | ||
|
|
||
| describe('formatLargeText', () => { | ||
| it('returns empty string for empty input', () => { | ||
| expect(formatLargeText('')).toBe(''); | ||
| }); | ||
|
|
||
| it('wraps simple text in paragraph tags', () => { | ||
| expect(formatLargeText('Hello world')).toBe('<p>Hello world</p>'); | ||
| }); | ||
|
|
||
| it('converts single newlines to br tags', () => { | ||
| expect(formatLargeText('Line1\nLine2')).toBe('<p>Line1<br>Line2</p>'); | ||
| }); | ||
|
|
||
| it('converts double newlines to paragraph breaks with proper nesting', () => { | ||
| const result = formatLargeText('Line1\n\nLine2'); | ||
| expect(result).toBe('<p>Line1</p><p class="mt-3">Line2</p>'); | ||
| }); | ||
|
|
||
| it('handles multiple paragraph breaks correctly', () => { | ||
| const result = formatLargeText('Para1\n\nPara2\n\nPara3'); | ||
| expect(result).toBe('<p>Para1</p><p class="mt-3">Para2</p><p class="mt-3">Para3</p>'); | ||
| }); | ||
|
|
||
| it('escapes HTML in the input', () => { | ||
| const result = formatLargeText('<script>alert("xss")</script>'); | ||
| expect(result).toContain('<script>'); | ||
| expect(result).not.toContain('<script>'); | ||
| }); | ||
|
|
||
| it('formats inline code with backticks', () => { | ||
| const result = formatLargeText('Use `code` here'); | ||
| expect(result).toContain('<code'); | ||
| expect(result).toContain('>code</code>'); | ||
| }); | ||
|
|
||
| it('formats bold text', () => { | ||
| const result = formatLargeText('This is **bold** text'); | ||
| expect(result).toContain('<strong>bold</strong>'); | ||
| }); | ||
|
|
||
| it('formats italic text', () => { | ||
| const result = formatLargeText('This is *italic* text'); | ||
| expect(result).toContain('<em>italic</em>'); | ||
| }); | ||
| }); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The number regex pattern
/\b\d+\.?\d*\b/will match numbers like "123." (trailing decimal point with no digits after). This is technically valid in some contexts but may not be intended.Consider using
/\b\d+(?:\.\d+)?\b/which requires at least one digit after the decimal point, or/\b\d+\.?\d+\b|\b\d+\b/to match either integers or proper decimals.Examples of current behavior: