diff --git a/src/core/task-persistence/__tests__/taskMessages.sanitize.spec.ts b/src/core/task-persistence/__tests__/taskMessages.sanitize.spec.ts new file mode 100644 index 000000000000..5ef7dcadfeca --- /dev/null +++ b/src/core/task-persistence/__tests__/taskMessages.sanitize.spec.ts @@ -0,0 +1,178 @@ +/** + * Tests for centralized UI message redaction in taskMessages.ts + * Verifies: + * - saveTaskMessages() sanitizes sensitive payloads before persistence + * - readTaskMessages() sanitizes legacy payloads on read as a safety net + * - Idempotency and non-string handling + */ + +import * as path from "path" + +// Mocks +let writtenPath: string | null = null +let writtenData: any = null + +vi.mock("../../../utils/safeWriteJson", () => { + return { + safeWriteJson: vi.fn(async (p: string, data: any) => { + writtenPath = p + writtenData = data + }), + } +}) + +vi.mock("../../../utils/storage", () => { + return { + getTaskDirectoryPath: vi.fn(async (_globalStoragePath: string, _taskId: string) => "/tmp/taskdir"), + } +}) + +let fileExists = true +vi.mock("../../../utils/fs", () => { + return { + fileExistsAtPath: vi.fn(async (_p: string) => fileExists), + } +}) + +// For read sanitization tests - simulate raw file contents +let mockReadFilePayload: string = "[]" +vi.mock("fs/promises", async (importOriginal) => { + const actual = await importOriginal() + return { + ...actual, + readFile: vi.fn(async (_p: string, _enc: string) => mockReadFilePayload), + } +}) + +// SUT +import { readTaskMessages, saveTaskMessages } from "../taskMessages" +import { GlobalFileNames } from "../../../shared/globalFileNames" + +describe("taskMessages redaction", () => { + beforeEach(() => { + writtenPath = null + writtenData = null + fileExists = true + mockReadFilePayload = "[]" + }) + + it("saveTaskMessages() should sanitize sensitive tags and JSON 'request' envelope", async () => { + const messages = [ + // JSON api_req_started envelope + { + ts: 1, + type: "say", + say: "api_req_started", + text: JSON.stringify({ + request: + "Header\n" + + "s1\n" + + "topsecret\n" + + "inner\n" + + "body", + apiProtocol: "anthropic", + }), + }, + // Raw UI text with various tags + { + ts: 2, + type: "say", + say: "text", + text: + "pre " + + "multi " + + "abc " + + "blob " + + "secretbytes " + + "post", + }, + // Non-sensitive string should remain identical + { ts: 3, type: "say", say: "text", text: "no sensitive" }, + // Non-string text should be left untouched + { ts: 4, type: "say", say: "text", text: undefined }, + ] as any[] + + await saveTaskMessages({ messages, taskId: "t1", globalStoragePath: "/any" }) + + // Assert path used + expect(writtenPath).toBe(path.join("/tmp/taskdir", GlobalFileNames.uiMessages)) + expect(Array.isArray(writtenData)).toBe(true) + + const [m1, m2, m3, m4] = writtenData as any[] + + // m1: JSON envelope should be sanitized inside request + const m1Obj = JSON.parse(m1.text || "{}") + expect(typeof m1Obj.request).toBe("string") + expect(m1Obj.request).toContain("[omitted]") + expect(m1Obj.request).toContain("[omitted]") + expect(m1Obj.request).toContain("[omitted]") + expect(m1Obj.request).toContain("[omitted]") + // Original payloads should not remain + expect(m1Obj.request).not.toContain("topsecret") + expect(m1Obj.request).not.toContain("inner") + expect(m1Obj.request).not.toContain("body") + expect(m1Obj.request).not.toContain("multi") + + // m2: raw text with tags should be scrubbed + expect(m2.text).toContain("[omitted]") + expect(m2.text).toContain("[omitted]") + expect(m2.text).toContain("[omitted]") + expect(m2.text).toContain("[omitted]") + expect(m2.text).not.toContain("secretbytes") + expect(m2.text).not.toContain("blob") + expect(m2.text).not.toContain("abc") + expect(m2.text).not.toContain("multi") + + // m3: unchanged safe content + expect(m3.text).toBe("no sensitive") + + // m4: undefined remains undefined + expect(m4.text).toBeUndefined() + }) + + it("readTaskMessages() should sanitize legacy on read", async () => { + const legacy = [ + { + ts: 10, + type: "say", + say: "api_req_started", + text: JSON.stringify({ + request: "X L3gacy Y", + apiProtocol: "anthropic", + }), + }, + { + ts: 11, + type: "say", + say: "text", + text: "pre bundle post", + }, + ] + mockReadFilePayload = JSON.stringify(legacy) + + const result = await readTaskMessages({ taskId: "t2", globalStoragePath: "/any" }) + + expect(result.length).toBe(2) + const [r1, r2] = result as any[] + + const r1Obj = JSON.parse(r1.text || "{}") + expect(r1Obj.request).toContain("[omitted]") + expect(r1Obj.request).not.toContain("L3gacy") + + expect(r2.text).toContain("[omitted]") + expect(r2.text).not.toContain("bundle") + }) + + it("sanitization should be idempotent", async () => { + const alreadySanitized = [ + { + ts: 20, + type: "say", + say: "text", + text: "A [omitted] B [omitted]", + }, + ] + await saveTaskMessages({ messages: alreadySanitized as any[], taskId: "t3", globalStoragePath: "/any" }) + expect(writtenData[0].text).toBe("A [omitted] B [omitted]") + }) +}) diff --git a/src/core/task-persistence/taskMessages.ts b/src/core/task-persistence/taskMessages.ts index 63a2eefbaae2..ed4b837566e3 100644 --- a/src/core/task-persistence/taskMessages.ts +++ b/src/core/task-persistence/taskMessages.ts @@ -9,11 +9,72 @@ import { fileExistsAtPath } from "../../utils/fs" import { GlobalFileNames } from "../../shared/globalFileNames" import { getTaskDirectoryPath } from "../../utils/storage" +/** + * Redaction utilities: + * We only need to ensure sensitive file payloads are NOT persisted to disk (ui_messages.json). + * Centralizing the sanitization in the persistence layer keeps Task.ts simple and avoids scattering + * redaction logic across multiple call-sites. + * + * Precompiled patterns are hoisted to module scope for clarity and efficiency. + * Precedence: more specific tags are applied first. + */ +const FILE_CONTENT_TAG_RE = //gi +const CONTENT_TAG_RE = /]*>[\s\S]*?<\/content>/gi +const FILE_TAG_RE = /]*>[\s\S]*?<\/file>/gi +const FILES_TAG_RE = /]*>[\s\S]*?<\/files>/gi + +function hasStringText(m: ClineMessage): m is ClineMessage & { text: string } { + return typeof (m as any)?.text === "string" +} + +function sanitizeMessageText(text?: string): string | undefined { + if (!text) return text + + // Scrub helper that replaces inner contents of known file payload tags with an omission marker + const scrub = (s: string): string => { + // Order matters: scrub more specific tags first + s = s.replace(FILE_CONTENT_TAG_RE, "[omitted]") + s = s.replace(CONTENT_TAG_RE, "[omitted]") + s = s.replace(FILE_TAG_RE, "[omitted]") + s = s.replace(FILES_TAG_RE, "[omitted]") + return s + } + + // If JSON payload (e.g. api_req_started), try to sanitize its 'request' field + try { + const obj = JSON.parse(text) + if (obj && typeof obj === "object" && typeof obj.request === "string") { + obj.request = scrub(obj.request) + return JSON.stringify(obj) + } + } catch { + // Not JSON; fall through to raw scrub + } + + return scrub(text) +} + +function sanitizeMessages(messages: ClineMessage[]): ClineMessage[] { + return messages.map((m) => { + if (hasStringText(m)) { + return { ...m, text: sanitizeMessageText(m.text) } + } + return m + }) +} + export type ReadTaskMessagesOptions = { taskId: string globalStoragePath: string } +/** + * Note on double-sanitization: + * - The canonical enforcement point is write-time via saveTaskMessages(). + * - We also sanitize on read here as a transitional safety net to protect against any + * legacy ui_messages.json that may still contain payloads from older versions. + * This read-time sanitization can be removed in a future version once legacy data is unlikely. + */ export async function readTaskMessages({ taskId, globalStoragePath, @@ -23,7 +84,9 @@ export async function readTaskMessages({ const fileExists = await fileExistsAtPath(filePath) if (fileExists) { - return JSON.parse(await fs.readFile(filePath, "utf8")) + // Sanitize on read as a safety net for any legacy persisted content + const raw = JSON.parse(await fs.readFile(filePath, "utf8")) + return sanitizeMessages(raw) } return [] @@ -38,5 +101,8 @@ export type SaveTaskMessagesOptions = { export async function saveTaskMessages({ messages, taskId, globalStoragePath }: SaveTaskMessagesOptions) { const taskDir = await getTaskDirectoryPath(globalStoragePath, taskId) const filePath = path.join(taskDir, GlobalFileNames.uiMessages) - await safeWriteJson(filePath, messages) + + // Persist a sanitized copy to disk to avoid storing sensitive file payloads + const sanitized = sanitizeMessages(messages) + await safeWriteJson(filePath, sanitized) }