Skip to content

Commit cc77aab

Browse files
committed
test(core): add sanitizer spec; hoist regex patterns and type guard in taskMessages
1 parent f3e46fc commit cc77aab

File tree

2 files changed

+202
-6
lines changed

2 files changed

+202
-6
lines changed
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
/**
2+
* Tests for centralized UI message redaction in taskMessages.ts
3+
* Verifies:
4+
* - saveTaskMessages() sanitizes sensitive payloads before persistence
5+
* - readTaskMessages() sanitizes legacy payloads on read as a safety net
6+
* - Idempotency and non-string handling
7+
*/
8+
9+
import * as path from "path"
10+
11+
// Mocks
12+
let writtenPath: string | null = null
13+
let writtenData: any = null
14+
15+
vi.mock("../../../utils/safeWriteJson", () => {
16+
return {
17+
safeWriteJson: vi.fn(async (p: string, data: any) => {
18+
writtenPath = p
19+
writtenData = data
20+
}),
21+
}
22+
})
23+
24+
vi.mock("../../../utils/storage", () => {
25+
return {
26+
getTaskDirectoryPath: vi.fn(async (_globalStoragePath: string, _taskId: string) => "/tmp/taskdir"),
27+
}
28+
})
29+
30+
let fileExists = true
31+
vi.mock("../../../utils/fs", () => {
32+
return {
33+
fileExistsAtPath: vi.fn(async (_p: string) => fileExists),
34+
}
35+
})
36+
37+
// For read sanitization tests - simulate raw file contents
38+
let mockReadFilePayload: string = "[]"
39+
vi.mock("fs/promises", async (importOriginal) => {
40+
const actual = await importOriginal<any>()
41+
return {
42+
...actual,
43+
readFile: vi.fn(async (_p: string, _enc: string) => mockReadFilePayload),
44+
}
45+
})
46+
47+
// SUT
48+
import { readTaskMessages, saveTaskMessages } from "../taskMessages"
49+
import { GlobalFileNames } from "../../../shared/globalFileNames"
50+
51+
describe("taskMessages redaction", () => {
52+
beforeEach(() => {
53+
writtenPath = null
54+
writtenData = null
55+
fileExists = true
56+
mockReadFilePayload = "[]"
57+
})
58+
59+
it("saveTaskMessages() should sanitize sensitive tags and JSON 'request' envelope", async () => {
60+
const messages = [
61+
// JSON api_req_started envelope
62+
{
63+
ts: 1,
64+
type: "say",
65+
say: "api_req_started",
66+
text: JSON.stringify({
67+
request:
68+
"Header\n" +
69+
"<files>s1</files>\n" +
70+
"<file_content>topsecret</file_content>\n" +
71+
"<content type='text'>inner</content>\n" +
72+
"<file x='y'>body</file>",
73+
apiProtocol: "anthropic",
74+
}),
75+
},
76+
// Raw UI text with various tags
77+
{
78+
ts: 2,
79+
type: "say",
80+
say: "text",
81+
text:
82+
"pre " +
83+
"<files>multi</files> " +
84+
"<file id='1'>abc</file> " +
85+
"<content>blob</content> " +
86+
"<file_content>secretbytes</file_content> " +
87+
"post",
88+
},
89+
// Non-sensitive string should remain identical
90+
{ ts: 3, type: "say", say: "text", text: "no sensitive" },
91+
// Non-string text should be left untouched
92+
{ ts: 4, type: "say", say: "text", text: undefined },
93+
] as any[]
94+
95+
await saveTaskMessages({ messages, taskId: "t1", globalStoragePath: "/any" })
96+
97+
// Assert path used
98+
expect(writtenPath).toBe(path.join("/tmp/taskdir", GlobalFileNames.uiMessages))
99+
expect(Array.isArray(writtenData)).toBe(true)
100+
101+
const [m1, m2, m3, m4] = writtenData as any[]
102+
103+
// m1: JSON envelope should be sanitized inside request
104+
const m1Obj = JSON.parse(m1.text || "{}")
105+
expect(typeof m1Obj.request).toBe("string")
106+
expect(m1Obj.request).toContain("<file_content>[omitted]</file_content>")
107+
expect(m1Obj.request).toContain("<content>[omitted]</content>")
108+
expect(m1Obj.request).toContain("<file>[omitted]</file>")
109+
expect(m1Obj.request).toContain("<files>[omitted]</files>")
110+
// Original payloads should not remain
111+
expect(m1Obj.request).not.toContain("topsecret")
112+
expect(m1Obj.request).not.toContain("inner")
113+
expect(m1Obj.request).not.toContain("body")
114+
expect(m1Obj.request).not.toContain("multi")
115+
116+
// m2: raw text with tags should be scrubbed
117+
expect(m2.text).toContain("<file_content>[omitted]</file_content>")
118+
expect(m2.text).toContain("<content>[omitted]</content>")
119+
expect(m2.text).toContain("<file>[omitted]</file>")
120+
expect(m2.text).toContain("<files>[omitted]</files>")
121+
expect(m2.text).not.toContain("secretbytes")
122+
expect(m2.text).not.toContain("blob")
123+
expect(m2.text).not.toContain("abc")
124+
expect(m2.text).not.toContain("multi")
125+
126+
// m3: unchanged safe content
127+
expect(m3.text).toBe("no sensitive")
128+
129+
// m4: undefined remains undefined
130+
expect(m4.text).toBeUndefined()
131+
})
132+
133+
it("readTaskMessages() should sanitize legacy on read", async () => {
134+
const legacy = [
135+
{
136+
ts: 10,
137+
type: "say",
138+
say: "api_req_started",
139+
text: JSON.stringify({
140+
request: "X <file_content>L3gacy</file_content> Y",
141+
apiProtocol: "anthropic",
142+
}),
143+
},
144+
{
145+
ts: 11,
146+
type: "say",
147+
say: "text",
148+
text: "pre <files>bundle</files> post",
149+
},
150+
]
151+
mockReadFilePayload = JSON.stringify(legacy)
152+
153+
const result = await readTaskMessages({ taskId: "t2", globalStoragePath: "/any" })
154+
155+
expect(result.length).toBe(2)
156+
const [r1, r2] = result as any[]
157+
158+
const r1Obj = JSON.parse(r1.text || "{}")
159+
expect(r1Obj.request).toContain("<file_content>[omitted]</file_content>")
160+
expect(r1Obj.request).not.toContain("L3gacy")
161+
162+
expect(r2.text).toContain("<files>[omitted]</files>")
163+
expect(r2.text).not.toContain("bundle")
164+
})
165+
166+
it("sanitization should be idempotent", async () => {
167+
const alreadySanitized = [
168+
{
169+
ts: 20,
170+
type: "say",
171+
say: "text",
172+
text: "A <file_content>[omitted]</file_content> B <content>[omitted]</content>",
173+
},
174+
]
175+
await saveTaskMessages({ messages: alreadySanitized as any[], taskId: "t3", globalStoragePath: "/any" })
176+
expect(writtenData[0].text).toBe("A <file_content>[omitted]</file_content> B <content>[omitted]</content>")
177+
})
178+
})

src/core/task-persistence/taskMessages.ts

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,29 @@ import { getTaskDirectoryPath } from "../../utils/storage"
1414
* We only need to ensure sensitive file payloads are NOT persisted to disk (ui_messages.json).
1515
* Centralizing the sanitization in the persistence layer keeps Task.ts simple and avoids scattering
1616
* redaction logic across multiple call-sites.
17+
*
18+
* Precompiled patterns are hoisted to module scope for clarity and efficiency.
19+
* Precedence: more specific tags are applied first.
1720
*/
21+
const FILE_CONTENT_TAG_RE = /<file_content\b[\s\S]*?<\/file_content>/gi
22+
const CONTENT_TAG_RE = /<content\b[^>]*>[\s\S]*?<\/content>/gi
23+
const FILE_TAG_RE = /<file\b[^>]*>[\s\S]*?<\/file>/gi
24+
const FILES_TAG_RE = /<files\b[^>]*>[\s\S]*?<\/files>/gi
25+
26+
function hasStringText(m: ClineMessage): m is ClineMessage & { text: string } {
27+
return typeof (m as any)?.text === "string"
28+
}
1829

1930
function sanitizeMessageText(text?: string): string | undefined {
2031
if (!text) return text
2132

2233
// Scrub helper that replaces inner contents of known file payload tags with an omission marker
2334
const scrub = (s: string): string => {
2435
// Order matters: scrub more specific tags first
25-
s = s.replace(/<file_content\b[\s\S]*?<\/file_content>/gi, "<file_content>[omitted]</file_content>")
26-
s = s.replace(/<content\b[^>]*>[\s\S]*?<\/content>/gi, "<content>[omitted]</content>")
27-
s = s.replace(/<file\b[^>]*>[\s\S]*?<\/file>/gi, "<file>[omitted]</file>")
28-
s = s.replace(/<files\b[^>]*>[\s\S]*?<\/files>/gi, "<files>[omitted]</files>")
36+
s = s.replace(FILE_CONTENT_TAG_RE, "<file_content>[omitted]</file_content>")
37+
s = s.replace(CONTENT_TAG_RE, "<content>[omitted]</content>")
38+
s = s.replace(FILE_TAG_RE, "<file>[omitted]</file>")
39+
s = s.replace(FILES_TAG_RE, "<files>[omitted]</files>")
2940
return s
3041
}
3142

@@ -45,8 +56,8 @@ function sanitizeMessageText(text?: string): string | undefined {
4556

4657
function sanitizeMessages(messages: ClineMessage[]): ClineMessage[] {
4758
return messages.map((m) => {
48-
if (typeof (m as any).text === "string") {
49-
return { ...m, text: sanitizeMessageText((m as any).text) }
59+
if (hasStringText(m)) {
60+
return { ...m, text: sanitizeMessageText(m.text) }
5061
}
5162
return m
5263
})
@@ -57,6 +68,13 @@ export type ReadTaskMessagesOptions = {
5768
globalStoragePath: string
5869
}
5970

71+
/**
72+
* Note on double-sanitization:
73+
* - The canonical enforcement point is write-time via saveTaskMessages().
74+
* - We also sanitize on read here as a transitional safety net to protect against any
75+
* legacy ui_messages.json that may still contain payloads from older versions.
76+
* This read-time sanitization can be removed in a future version once legacy data is unlikely.
77+
*/
6078
export async function readTaskMessages({
6179
taskId,
6280
globalStoragePath,

0 commit comments

Comments
 (0)