Skip to content

Commit a797a54

Browse files
rootcursoragent
andcommitted
fix(sanitize): preserve angle brackets inside markdown code
Protect angle brackets in fenced and inline code before bluemonday HTML sanitization so generic type syntax like mut_raw_ptr<int> is not stripped from issue and PR bodies returned by MCP tools. Closes #2202 Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 457f599 commit a797a54

3 files changed

Lines changed: 263 additions & 2 deletions

File tree

pkg/github/issues_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,45 @@ func Test_GetIssue(t *testing.T) {
276276
}
277277
}
278278

279+
func Test_GetIssue_PreservesAngleBracketsInCodeBlocks(t *testing.T) {
280+
body := "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```"
281+
mockIssue := &github.Issue{
282+
Number: github.Ptr(42),
283+
Title: github.Ptr("Angle brackets in code"),
284+
Body: github.Ptr(body),
285+
State: github.Ptr("open"),
286+
HTMLURL: github.Ptr("https://github.com/owner/repo/issues/42"),
287+
User: &github.User{Login: github.Ptr("testuser")},
288+
}
289+
290+
serverTool := IssueRead(translations.NullTranslationHelper)
291+
client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{
292+
GetReposIssuesByOwnerByRepoByIssueNumber: mockResponse(t, http.StatusOK, mockIssue),
293+
}))
294+
deps := BaseDeps{
295+
Client: client,
296+
GQLClient: defaultGQLClient,
297+
}
298+
handler := serverTool.Handler(deps)
299+
300+
request := createMCPRequest(map[string]any{
301+
"method": "get",
302+
"owner": "owner",
303+
"repo": "repo",
304+
"issue_number": float64(42),
305+
})
306+
result, err := handler(ContextWithDeps(context.Background(), deps), &request)
307+
require.NoError(t, err)
308+
require.NotNil(t, result)
309+
require.False(t, result.IsError)
310+
311+
textContent := getTextResult(t, result)
312+
var returnedIssue MinimalIssue
313+
err = json.Unmarshal([]byte(textContent.Text), &returnedIssue)
314+
require.NoError(t, err)
315+
assert.Equal(t, body, returnedIssue.Body)
316+
}
317+
279318
func Test_IssueRead_IFC_InsidersMode(t *testing.T) {
280319
t.Parallel()
281320

pkg/sanitize/sanitize.go

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ var policy *bluemonday.Policy
1212
var policyOnce sync.Once
1313

1414
func Sanitize(input string) string {
15-
return FilterHTMLTags(FilterCodeFenceMetadata(FilterInvisibleCharacters(input)))
15+
cleaned := FilterCodeFenceMetadata(FilterInvisibleCharacters(input))
16+
protected := protectCodeAngleBrackets(cleaned)
17+
sanitized := FilterHTMLTags(protected)
18+
return restoreCodeAngleBrackets(sanitized)
1619
}
1720

1821
// FilterInvisibleCharacters removes invisible or control characters that should not appear
@@ -145,6 +148,141 @@ func isSafeCodeFenceToken(token string) bool {
145148
return true
146149
}
147150

151+
// Sentinels used to protect angle brackets inside code from HTML sanitization.
152+
// NUL bytes are stripped by FilterInvisibleCharacters before protectCodeAngleBrackets
153+
// runs, preventing sentinel collision attacks.
154+
const (
155+
ltSentinel = "\x00LT\x00"
156+
gtSentinel = "\x00GT\x00"
157+
)
158+
159+
// protectCodeAngleBrackets replaces < and > inside fenced and inline code with
160+
// sentinels so bluemonday does not strip them as HTML tags.
161+
func protectCodeAngleBrackets(input string) string {
162+
if input == "" {
163+
return input
164+
}
165+
166+
lines := strings.Split(input, "\n")
167+
insideFence := false
168+
currentFenceLen := 0
169+
170+
for i, line := range lines {
171+
if toggled, fenceLen := toggleCodeFence(line, insideFence, currentFenceLen); toggled {
172+
insideFence = !insideFence
173+
if insideFence {
174+
currentFenceLen = fenceLen
175+
} else {
176+
currentFenceLen = 0
177+
}
178+
continue
179+
}
180+
181+
if insideFence {
182+
lines[i] = replaceAngleBrackets(line)
183+
continue
184+
}
185+
lines[i] = protectInlineCodeAngleBrackets(line)
186+
}
187+
188+
return strings.Join(lines, "\n")
189+
}
190+
191+
func toggleCodeFence(line string, insideFence bool, currentFenceLen int) (bool, int) {
192+
idx := strings.Index(line, "```")
193+
if idx == -1 || hasNonWhitespace(line[:idx]) {
194+
return false, currentFenceLen
195+
}
196+
197+
fenceEnd := idx
198+
for fenceEnd < len(line) && line[fenceEnd] == '`' {
199+
fenceEnd++
200+
}
201+
202+
fenceLen := fenceEnd - idx
203+
if fenceLen < 3 {
204+
return false, currentFenceLen
205+
}
206+
207+
if insideFence {
208+
if currentFenceLen != 0 && fenceLen < currentFenceLen {
209+
return false, currentFenceLen
210+
}
211+
return true, fenceLen
212+
}
213+
214+
return true, fenceLen
215+
}
216+
217+
func protectInlineCodeAngleBrackets(line string) string {
218+
if !strings.Contains(line, "`") {
219+
return line
220+
}
221+
222+
var out strings.Builder
223+
out.Grow(len(line))
224+
i := 0
225+
for i < len(line) {
226+
if line[i] != '`' {
227+
out.WriteByte(line[i])
228+
i++
229+
continue
230+
}
231+
232+
openStart := i
233+
openLen := 0
234+
for i < len(line) && line[i] == '`' {
235+
openLen++
236+
i++
237+
}
238+
239+
contentStart := i
240+
closeIdx := findInlineCodeClose(line, contentStart, openLen)
241+
if closeIdx == -1 {
242+
out.WriteString(line[openStart:i])
243+
continue
244+
}
245+
246+
out.WriteString(line[openStart:contentStart])
247+
out.WriteString(replaceAngleBrackets(line[contentStart:closeIdx]))
248+
out.WriteString(line[closeIdx : closeIdx+openLen])
249+
i = closeIdx + openLen
250+
}
251+
252+
return out.String()
253+
}
254+
255+
func findInlineCodeClose(line string, contentStart, openLen int) int {
256+
for i := contentStart; i < len(line); i++ {
257+
if line[i] != '`' {
258+
continue
259+
}
260+
261+
closeLen := 0
262+
for j := i; j < len(line) && line[j] == '`'; j++ {
263+
closeLen++
264+
}
265+
if closeLen == openLen {
266+
return i
267+
}
268+
}
269+
270+
return -1
271+
}
272+
273+
func replaceAngleBrackets(s string) string {
274+
if !strings.ContainsAny(s, "<>") {
275+
return s
276+
}
277+
s = strings.ReplaceAll(s, "<", ltSentinel)
278+
return strings.ReplaceAll(s, ">", gtSentinel)
279+
}
280+
281+
func restoreCodeAngleBrackets(input string) string {
282+
s := strings.ReplaceAll(input, ltSentinel, "<")
283+
return strings.ReplaceAll(s, gtSentinel, ">")
284+
}
285+
148286
func getPolicy() *bluemonday.Policy {
149287
policyOnce.Do(func() {
150288
p := bluemonday.StrictPolicy()
@@ -175,7 +313,8 @@ func getPolicy() *bluemonday.Policy {
175313

176314
func shouldRemoveRune(r rune) bool {
177315
switch r {
178-
case 0x200B, // ZERO WIDTH SPACE
316+
case 0x0000, // NUL — stripped to prevent sentinel collision in protectCodeAngleBrackets
317+
0x200B, // ZERO WIDTH SPACE
179318
0x200C, // ZERO WIDTH NON-JOINER
180319
0x200E, // LEFT-TO-RIGHT MARK
181320
0x200F, // RIGHT-TO-LEFT MARK

pkg/sanitize/sanitize_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ func TestShouldRemoveRune(t *testing.T) {
129129
expected bool
130130
}{
131131
// Individual characters that should be removed
132+
{name: "NUL byte", rune: 0x0000, expected: true},
132133
{name: "zero width space", rune: 0x200B, expected: true},
133134
{name: "zero width non-joiner", rune: 0x200C, expected: true},
134135
{name: "left-to-right mark", rune: 0x200E, expected: true},
@@ -300,3 +301,85 @@ func TestSanitizeRemovesInvisibleCodeFenceMetadata(t *testing.T) {
300301
result := Sanitize(input)
301302
assert.Equal(t, expected, result)
302303
}
304+
305+
func TestSanitizePreservesAngleBracketsInCodeBlocks(t *testing.T) {
306+
tests := []struct {
307+
name string
308+
input string
309+
expected string
310+
}{
311+
{
312+
name: "fenced code block with angle brackets",
313+
input: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
314+
expected: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
315+
},
316+
{
317+
name: "inline code with angle brackets",
318+
input: "Use `Vec<String>` for collections.",
319+
expected: "Use `Vec<String>` for collections.",
320+
},
321+
{
322+
name: "angle brackets outside code are sanitized",
323+
input: "This has <script>alert('xss')</script> in it.",
324+
expected: "This has in it.",
325+
},
326+
{
327+
name: "fenced code block with generic types",
328+
input: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
329+
expected: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
330+
},
331+
{
332+
name: "multiple inline code spans with angle brackets",
333+
input: "Compare `Map<K, V>` and `Set<T>`.",
334+
expected: "Compare `Map<K, V>` and `Set<T>`.",
335+
},
336+
{
337+
name: "shorter fence inside code does not close block",
338+
input: "````\nline<A>\n```\nstill<B>\n````",
339+
expected: "````\nline<A>\n```\nstill<B>\n````",
340+
},
341+
{
342+
name: "sentinel collision does not bypass sanitizer",
343+
input: "\x00LT\x00script\x00GT\x00alert(1)\x00LT\x00/script\x00GT\x00",
344+
expected: "LTscriptGTalert(1)LT/scriptGT",
345+
},
346+
}
347+
348+
for _, tt := range tests {
349+
t.Run(tt.name, func(t *testing.T) {
350+
result := Sanitize(tt.input)
351+
assert.Equal(t, tt.expected, result)
352+
})
353+
}
354+
}
355+
356+
func TestProtectCodeAngleBrackets(t *testing.T) {
357+
tests := []struct {
358+
name string
359+
input string
360+
expected string
361+
}{
362+
{
363+
name: "fenced code block with angle brackets",
364+
input: "```\nvector<int> v;\n```",
365+
expected: "```\nvector" + ltSentinel + "int" + gtSentinel + " v;\n```",
366+
},
367+
{
368+
name: "inline code with angle brackets",
369+
input: "Use `Map<K, V>` here.",
370+
expected: "Use `Map" + ltSentinel + "K, V" + gtSentinel + "` here.",
371+
},
372+
{
373+
name: "angle brackets outside code unchanged",
374+
input: "Use <b>bold</b>\n```\ncode<T>\n```",
375+
expected: "Use <b>bold</b>\n```\ncode" + ltSentinel + "T" + gtSentinel + "\n```",
376+
},
377+
}
378+
379+
for _, tt := range tests {
380+
t.Run(tt.name, func(t *testing.T) {
381+
result := protectCodeAngleBrackets(tt.input)
382+
assert.Equal(t, tt.expected, result)
383+
})
384+
}
385+
}

0 commit comments

Comments
 (0)