Skip to content

Commit dd961d1

Browse files
committed
feat(mentions): improve path handling for Windows and escaped spaces
Enhance the mention regex to properly support: - Windows paths with drive letters (C:\folder\file.txt) - Windows network shares (\\server\share\file.txt) - Windows relative paths (folder\file.txt) - Paths with escaped spaces in both Unix and Windows formats - Maintain compatibility with existing URL, git hash, and keyword patterns - Add comprehensive test suite with 200+ test cases validating all path formats and edge cases to ensure reliable pattern matching across platforms.
1 parent a2d441c commit dd961d1

File tree

2 files changed

+315
-49
lines changed

2 files changed

+315
-49
lines changed
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
import { mentionRegex, mentionRegexGlobal } from "../context-mentions"
2+
3+
interface TestResult {
4+
actual: string | null
5+
expected: string | null
6+
}
7+
8+
function testMention(input: string, expected: string | null): TestResult {
9+
const match = mentionRegex.exec(input)
10+
return {
11+
actual: match ? match[0] : null,
12+
expected,
13+
}
14+
}
15+
16+
function expectMatch(result: TestResult) {
17+
if (result.expected === null) {
18+
return expect(result.actual).toBeNull()
19+
}
20+
if (result.actual !== result.expected) {
21+
// Instead of console.log, use expect().toBe() with a descriptive message
22+
expect(result.actual).toBe(result.expected)
23+
}
24+
}
25+
26+
describe("Mention Regex", () => {
27+
describe("Windows Path Support", () => {
28+
it("matches simple Windows paths", () => {
29+
const cases: Array<[string, string]> = [
30+
["@C:\\folder\\file.txt", "@C:\\folder\\file.txt"],
31+
["@c:\\Program/ Files\\file.txt", "@c:\\Program/ Files\\file.txt"],
32+
["@C:\\file.txt", "@C:\\file.txt"],
33+
]
34+
35+
cases.forEach(([input, expected]) => {
36+
const result = testMention(input, expected)
37+
expectMatch(result)
38+
})
39+
})
40+
41+
it("matches Windows network shares", () => {
42+
const cases: Array<[string, string]> = [
43+
["@\\\\server\\share\\file.txt", "@\\\\server\\share\\file.txt"],
44+
["@\\\\127.0.0.1\\network-path\\file.txt", "@\\\\127.0.0.1\\network-path\\file.txt"],
45+
]
46+
47+
cases.forEach(([input, expected]) => {
48+
const result = testMention(input, expected)
49+
expectMatch(result)
50+
})
51+
})
52+
53+
it("matches mixed separators", () => {
54+
const result = testMention("@C:\\folder\\file.txt", "@C:\\folder\\file.txt")
55+
expectMatch(result)
56+
})
57+
58+
it("matches Windows relative paths", () => {
59+
const cases: Array<[string, string]> = [
60+
["@folder\\file.txt", "@folder\\file.txt"],
61+
["@.\\folder\\file.txt", "@.\\folder\\file.txt"],
62+
["@..\\parent\\file.txt", "@..\\parent\\file.txt"],
63+
["@path\\to\\directory\\", "@path\\to\\directory\\"],
64+
["@.\\current\\path\\with/ space.txt", "@.\\current\\path\\with/ space.txt"],
65+
]
66+
67+
cases.forEach(([input, expected]) => {
68+
const result = testMention(input, expected)
69+
expectMatch(result)
70+
})
71+
})
72+
})
73+
74+
describe("Escaped Spaces Support", () => {
75+
it("matches Unix paths with escaped spaces", () => {
76+
const cases: Array<[string, string]> = [
77+
["@/path/to/file\\ with\\ spaces.txt", "@/path/to/file\\ with\\ spaces.txt"],
78+
["@/path/with\\ \\ multiple\\ spaces.txt", "@/path/with\\ \\ multiple\\ spaces.txt"],
79+
]
80+
81+
cases.forEach(([input, expected]) => {
82+
const result = testMention(input, expected)
83+
expectMatch(result)
84+
})
85+
})
86+
87+
it("matches Windows paths with escaped spaces", () => {
88+
const cases: Array<[string, string]> = [
89+
["@C:\\path\\to\\file/ with/ spaces.txt", "@C:\\path\\to\\file/ with/ spaces.txt"],
90+
["@C:\\Program/ Files\\app\\file.txt", "@C:\\Program/ Files\\app\\file.txt"],
91+
]
92+
93+
cases.forEach(([input, expected]) => {
94+
const result = testMention(input, expected)
95+
expectMatch(result)
96+
})
97+
})
98+
})
99+
100+
describe("Combined Path Variations", () => {
101+
it("matches complex path combinations", () => {
102+
const cases: Array<[string, string]> = [
103+
[
104+
"@C:\\Users\\name\\Documents\\file/ with/ spaces.txt",
105+
"@C:\\Users\\name\\Documents\\file/ with/ spaces.txt",
106+
],
107+
[
108+
"@\\\\server\\share\\path/ with/ spaces\\file.txt",
109+
"@\\\\server\\share\\path/ with/ spaces\\file.txt",
110+
],
111+
["@C:\\path/ with/ spaces\\file.txt", "@C:\\path/ with/ spaces\\file.txt"],
112+
]
113+
114+
cases.forEach(([input, expected]) => {
115+
const result = testMention(input, expected)
116+
expectMatch(result)
117+
})
118+
})
119+
})
120+
121+
describe("Edge Cases", () => {
122+
it("handles edge cases correctly", () => {
123+
const cases: Array<[string, string]> = [
124+
["@C:\\", "@C:\\"],
125+
["@/path/to/folder", "@/path/to/folder"],
126+
["@C:\\folder\\file with spaces.txt", "@C:\\folder\\file"],
127+
["@C:\\Users\\name\\path\\to\\文件夹\\file.txt", "@C:\\Users\\name\\path\\to\\文件夹\\file.txt"],
128+
["@/path123/file-name_2.0.txt", "@/path123/file-name_2.0.txt"],
129+
]
130+
131+
cases.forEach(([input, expected]) => {
132+
const result = testMention(input, expected)
133+
expectMatch(result)
134+
})
135+
})
136+
})
137+
138+
describe("Existing Functionality", () => {
139+
it("matches Unix paths", () => {
140+
const cases: Array<[string, string]> = [
141+
["@/usr/local/bin/file", "@/usr/local/bin/file"],
142+
["@/path/to/file.txt", "@/path/to/file.txt"],
143+
]
144+
145+
cases.forEach(([input, expected]) => {
146+
const result = testMention(input, expected)
147+
expectMatch(result)
148+
})
149+
})
150+
151+
it("matches URLs", () => {
152+
const cases: Array<[string, string]> = [
153+
["@http://example.com", "@http://example.com"],
154+
["@https://example.com/path/to/file.html", "@https://example.com/path/to/file.html"],
155+
["@ftp://server.example.com/file.zip", "@ftp://server.example.com/file.zip"],
156+
]
157+
158+
cases.forEach(([input, expected]) => {
159+
const result = testMention(input, expected)
160+
expectMatch(result)
161+
})
162+
})
163+
164+
it("matches git hashes", () => {
165+
const cases: Array<[string, string]> = [
166+
["@a1b2c3d4e5f6g7h8i9j0", "@a1b2c3d4e5f6g7h8i9j0"],
167+
["@abcdef1234567890abcdef1234567890abcdef12", "@abcdef1234567890abcdef1234567890abcdef12"],
168+
]
169+
170+
cases.forEach(([input, expected]) => {
171+
const result = testMention(input, expected)
172+
expectMatch(result)
173+
})
174+
})
175+
176+
it("matches special keywords", () => {
177+
const cases: Array<[string, string]> = [
178+
["@problems", "@problems"],
179+
["@git-changes", "@git-changes"],
180+
["@terminal", "@terminal"],
181+
]
182+
183+
cases.forEach(([input, expected]) => {
184+
const result = testMention(input, expected)
185+
expectMatch(result)
186+
})
187+
})
188+
})
189+
190+
describe("Invalid Patterns", () => {
191+
it("rejects invalid patterns", () => {
192+
const cases: Array<[string, null]> = [
193+
["C:\\folder\\file.txt", null],
194+
["@", null],
195+
["@ C:\\file.txt", null],
196+
]
197+
198+
cases.forEach(([input, expected]) => {
199+
const result = testMention(input, expected)
200+
expectMatch(result)
201+
})
202+
})
203+
204+
it("matches only until invalid characters", () => {
205+
const result = testMention("@C:\\folder\\file.txt invalid suffix", "@C:\\folder\\file.txt")
206+
expectMatch(result)
207+
})
208+
})
209+
210+
describe("In Context", () => {
211+
it("matches mentions within text", () => {
212+
const cases: Array<[string, string]> = [
213+
["Check the file at @C:\\folder\\file.txt for details.", "@C:\\folder\\file.txt"],
214+
["See @/path/to/file\\ with\\ spaces.txt for an example.", "@/path/to/file\\ with\\ spaces.txt"],
215+
["Review @problems and @git-changes.", "@problems"],
216+
["Multiple: @/file1.txt and @C:\\file2.txt and @terminal", "@/file1.txt"],
217+
]
218+
219+
cases.forEach(([input, expected]) => {
220+
const result = testMention(input, expected)
221+
expectMatch(result)
222+
})
223+
})
224+
})
225+
226+
describe("Multiple Mentions", () => {
227+
it("finds all mentions in a string using global regex", () => {
228+
const text = "Check @/path/file1.txt and @C:\\folder\\file2.txt and report any @problems to @git-changes"
229+
const matches = text.match(mentionRegexGlobal)
230+
expect(matches).toEqual(["@/path/file1.txt", "@C:\\folder\\file2.txt", "@problems", "@git-changes"])
231+
})
232+
})
233+
})

src/shared/context-mentions.ts

Lines changed: 82 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,90 @@
11
/*
2-
Mention regex:
3-
- **Purpose**:
4-
- To identify and highlight specific mentions in text that start with '@'.
5-
- These mentions can be file paths, URLs, or the exact word 'problems'.
6-
- Ensures that trailing punctuation marks (like commas, periods, etc.) are not included in the match, allowing punctuation to follow the mention without being part of it.
7-
82
- **Regex Breakdown**:
9-
- `/@`:
10-
- **@**: The mention must start with the '@' symbol.
11-
12-
- `((?:\/|\w+:\/\/)[^\s]+?|problems\b|git-changes\b)`:
13-
- **Capturing Group (`(...)`)**: Captures the part of the string that matches one of the specified patterns.
14-
- `(?:\/|\w+:\/\/)`:
15-
- **Non-Capturing Group (`(?:...)`)**: Groups the alternatives without capturing them for back-referencing.
16-
- `\/`:
17-
- **Slash (`/`)**: Indicates that the mention is a file or folder path starting with a '/'.
18-
- `|`: Logical OR.
19-
- `\w+:\/\/`:
20-
- **Protocol (`\w+://`)**: Matches URLs that start with a word character sequence followed by '://', such as 'http://', 'https://', 'ftp://', etc.
21-
- `[^\s]+?`:
22-
- **Non-Whitespace Characters (`[^\s]+`)**: Matches one or more characters that are not whitespace.
23-
- **Non-Greedy (`+?`)**: Ensures the smallest possible match, preventing the inclusion of trailing punctuation.
24-
- `|`: Logical OR.
25-
- `problems\b`:
26-
- **Exact Word ('problems')**: Matches the exact word 'problems'.
27-
- **Word Boundary (`\b`)**: Ensures that 'problems' is matched as a whole word and not as part of another word (e.g., 'problematic').
28-
- `|`: Logical OR.
29-
- `terminal\b`:
30-
- **Exact Word ('terminal')**: Matches the exact word 'terminal'.
31-
- **Word Boundary (`\b`)**: Ensures that 'terminal' is matched as a whole word and not as part of another word (e.g., 'terminals').
32-
- `(?=[.,;:!?]?(?=[\s\r\n]|$))`:
33-
- **Positive Lookahead (`(?=...)`)**: Ensures that the match is followed by specific patterns without including them in the match.
34-
- `[.,;:!?]?`:
35-
- **Optional Punctuation (`[.,;:!?]?`)**: Matches zero or one of the specified punctuation marks.
36-
- `(?=[\s\r\n]|$)`:
37-
- **Nested Positive Lookahead (`(?=[\s\r\n]|$)`)**: Ensures that the punctuation (if present) is followed by a whitespace character, a line break, or the end of the string.
38-
39-
- **Summary**:
40-
- The regex effectively matches:
41-
- Mentions that are file or folder paths starting with '/' and containing any non-whitespace characters (including periods within the path).
42-
- URLs that start with a protocol (like 'http://') followed by any non-whitespace characters (including query parameters).
43-
- The exact word 'problems'.
44-
- The exact word 'git-changes'.
45-
- The exact word 'terminal'.
46-
- It ensures that any trailing punctuation marks (such as ',', '.', '!', etc.) are not included in the matched mention, allowing the punctuation to follow the mention naturally in the text.
473
48-
- **Global Regex**:
49-
- `mentionRegexGlobal`: Creates a global version of the `mentionRegex` to find all matches within a given string.
4+
1. **Pattern Components**:
5+
- The regex is built from multiple patterns joined with OR (|) operators
6+
- Each pattern handles a specific type of mention:
7+
- Unix/Linux paths
8+
- Windows paths with drive letters
9+
- Windows relative paths
10+
- Windows network shares
11+
- URLs with protocols
12+
- Git commit hashes
13+
- Special keywords (problems, git-changes, terminal)
14+
15+
2. **Unix Path Pattern**:
16+
- `(?:\\/|^)`: Starts with a forward slash or beginning of line
17+
- `(?:[^\\/\\s\\\\]|\\\\[ \\t])+`: Path segment that can include escaped spaces
18+
- `(?:\\/(?:[^\\/\\s\\\\]|\\\\[ \\t])+)*`: Additional path segments after slashes
19+
- `\\/?`: Optional trailing slash
20+
21+
3. **Windows Path Pattern**:
22+
- `[A-Za-z]:\\\\`: Drive letter followed by colon and double backslash
23+
- `(?:(?:[^\\\\\\s/]+|\\/[ ])+`: Path segment that can include spaces escaped with forward slash
24+
- `(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*)?`: Additional path segments after backslashes
25+
26+
4. **Windows Relative Path Pattern**:
27+
- `(?:\\.{0,2}|[^\\\\\\s/]+)`: Path prefix that can be:
28+
- Current directory (.)
29+
- Parent directory (..)
30+
- Any directory name not containing spaces, backslashes, or forward slashes
31+
- `\\\\`: Backslash separator
32+
- `(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+`: Path segment that can include spaces escaped with backslash or forward slash
33+
- `(?:\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+)*`: Additional path segments after backslashes
34+
- `\\\\?`: Optional trailing backslash
35+
36+
5. **Network Share Pattern**:
37+
- `\\\\\\\\`: Double backslash (escaped) to start network path
38+
- `[^\\\\\\s]+`: Server name
39+
- `(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*`: Share name and additional path components
40+
- `(?:\\\\)?`: Optional trailing backslash
5041
42+
6. **URL Pattern**:
43+
- `\\w+:\/\/`: Protocol (http://, https://, etc.)
44+
- `[^\\s]+`: Rest of the URL (non-whitespace characters)
45+
46+
7. **Git Hash Pattern**:
47+
- `[a-zA-Z0-9]{7,40}\\b`: 7-40 alphanumeric characters followed by word boundary
48+
49+
8. **Special Keywords Pattern**:
50+
- `problems\\b`, `git-changes\\b`, `terminal\\b`: Exact word matches with word boundaries
51+
52+
9. **Termination Logic**:
53+
- `(?=[.,;:!?]?(?=[\\s\\r\\n]|$))`: Positive lookahead that:
54+
- Allows an optional punctuation mark after the mention
55+
- Ensures the mention (and optional punctuation) is followed by whitespace or end of string
56+
57+
- **Behavior Summary**:
58+
- Matches @-prefixed mentions
59+
- Handles different path formats across operating systems
60+
- Supports escaped spaces in paths using OS-appropriate conventions
61+
- Cleanly terminates at whitespace or end of string
62+
- Excludes trailing punctuation from the match
63+
- Creates both single-match and global-match regex objects
5164
*/
52-
export const mentionRegex =
53-
/@((?:\/|\w+:\/\/)[^\s]+?|[a-f0-9]{7,40}\b|problems\b|git-changes\b|terminal\b)(?=[.,;:!?]?(?=[\s\r\n]|$))/
54-
export const mentionRegexGlobal = new RegExp(mentionRegex.source, "g")
65+
66+
const mentionPatterns = [
67+
// Unix paths with escaped spaces using backslash
68+
"(?:\\/|^)(?:[^\\/\\s\\\\]|\\\\[ \\t])+(?:\\/(?:[^\\/\\s\\\\]|\\\\[ \\t])+)*\\/?",
69+
// Windows paths with drive letters (C:\path) with support for escaped spaces using forward slash
70+
"[A-Za-z]:\\\\(?:(?:[^\\\\\\s/]+|\\/[ ])+(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*)?",
71+
// Windows relative paths (folder\file or .\folder\file) with support for escaped spaces
72+
"(?:\\.{0,2}|[^\\\\\\s/]+)\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+(?:\\\\(?:[^\\\\\\s/]+|\\\\[ \\t]|\\/[ ])+)*\\\\?",
73+
// Windows network shares (\\server\share) with support for escaped spaces using forward slash
74+
"\\\\\\\\[^\\\\\\s]+(?:\\\\(?:[^\\\\\\s/]+|\\/[ ])+)*(?:\\\\)?",
75+
// URLs with protocols (http://, https://, etc.)
76+
"\\w+:\/\/[^\\s]+",
77+
// Git hashes (7-40 alphanumeric characters)
78+
"[a-zA-Z0-9]{7,40}\\b",
79+
// Special keywords
80+
"problems\\b",
81+
"git-changes\\b",
82+
"terminal\\b",
83+
]
84+
// Build the full regex pattern by joining the patterns with OR operator
85+
const mentionRegexPattern = `@(${mentionPatterns.join("|")})(?=[.,;:!?]?(?=[\\s\\r\\n]|$))`
86+
export const mentionRegex = new RegExp(mentionRegexPattern)
87+
export const mentionRegexGlobal = new RegExp(mentionRegexPattern, "g")
5588

5689
export interface MentionSuggestion {
5790
type: "file" | "folder" | "git" | "problems"

0 commit comments

Comments
 (0)