diff --git a/doc/privacy.md b/doc/privacy.md new file mode 100644 index 00000000000..d39f3cb4420 --- /dev/null +++ b/doc/privacy.md @@ -0,0 +1,61 @@ +# Privacy + +This document describes what Etherpad stores and logs about its users, so +operators can publish an accurate data-processing statement. + +## Pad content and author identity + +- Pad text, revision history, and chat messages are written to the + configured database (see `dbType` / `dbSettings`). +- Authorship is tracked by an opaque `authorID` that is bound to a + short-lived author-token cookie. There is no link between an authorID + and a real-world identity unless a plugin or SSO layer adds one. + +## IP addresses + +Etherpad never writes a client IP to its database. IPs only appear in +`log4js` output (the `access`, `http`, `message`, and console loggers). +Whether those are persisted depends entirely on the log appender your +deployment configures. + +The `ipLogging` setting (`settings.json`) controls what those log +records contain. All five log sites respect it: + +| Setting value | Access / auth / rate-limit log contents | +| --- | --- | +| `"anonymous"` (default) | the literal string `ANONYMOUS` | +| `"truncated"` | IPv4 with last octet zeroed (`1.2.3.0`); IPv6 truncated to the first /48 (`2001:db8:1::`); IPv4-mapped IPv6 truncates the embedded v4; unknowns fall back to `ANONYMOUS` | +| `"full"` | the original IP address | + +The pre-2026 boolean `disableIPlogging` is still honoured for one +release cycle: `true` maps to `"anonymous"`, `false` maps to `"full"`. +A deprecation WARN is emitted when only the legacy setting is present. + +## Rate limiting + +The in-memory socket rate limiter keys on the raw client IP for the +duration of the limiter window (see `commitRateLimiting` in +`settings.json`). This state is never written to disk, never sent to a +plugin, and is thrown away on server restart. + +## What Etherpad does not do + +- No IP addresses are written to the database. +- No IP addresses are sent to `clientVars` (and therefore to the + browser). The long-standing `clientIp: '127.0.0.1'` placeholder was + removed in the same change that introduced `ipLogging`. +- No IP addresses are passed to server-side plugin hooks by Etherpad + itself. Plugins that receive a raw `req` can still read `req.ip` + directly — audit your installed plugins if you need to rule that + out. + +## Cookies + +See [`cookies.md`](cookies.md) for the full cookie list. + +## Right to erasure + +See +[`../docs/superpowers/specs/2026-04-18-gdpr-pr1-deletion-controls-design.md`](../docs/superpowers/specs/2026-04-18-gdpr-pr1-deletion-controls-design.md) +for the deletion-token mechanism. Full author erasure is tracked as a +follow-up in [ether/etherpad#6701](https://github.com/ether/etherpad/issues/6701). diff --git a/docs/superpowers/plans/2026-04-19-gdpr-pr2-ip-privacy-audit.md b/docs/superpowers/plans/2026-04-19-gdpr-pr2-ip-privacy-audit.md new file mode 100644 index 00000000000..0070e725c1b --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-gdpr-pr2-ip-privacy-audit.md @@ -0,0 +1,745 @@ +# GDPR PR2 — IP / Privacy Audit Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Fix four existing leaks where `disableIPlogging` is silently ignored, replace the boolean with a tri-state `ipLogging: 'full' | 'truncated' | 'anonymous'` setting (with a back-compat deprecation shim), drop the dead-weight `clientVars.clientIp` placeholder, and ship `doc/privacy.md` documenting Etherpad's real IP behaviour. + +**Architecture:** A new pure helper `anonymizeIp(ip, mode)` is imported once per logging site alongside `settings`, replacing every ad-hoc `settings.disableIPlogging ? 'ANONYMOUS' : ip` ternary. Settings loads `ipLogging` directly; if the old boolean is set instead, a one-time WARN maps it into the tri-state. `clientVars.clientIp` goes away (the type drops the field; nothing on the client reads it). Tests cover the helper and an end-to-end access-log assertion per mode. + +**Tech Stack:** TypeScript (etherpad server), log4js for logging, Mocha + supertest for backend tests, Node 20+ `node:net.isIP`. + +--- + +## File Structure + +**Created by this plan:** +- `src/node/utils/anonymizeIp.ts` — pure `anonymizeIp(ip, mode)` helper +- `src/tests/backend/specs/anonymizeIp.ts` — unit tests for the helper +- `src/tests/backend/specs/ipLoggingSetting.ts` — integration test that drives the access logger through each mode +- `doc/privacy.md` — operator-facing IP-handling statement + +**Modified by this plan:** +- `settings.json.template`, `settings.json.docker` — `ipLogging: "anonymous"` entry, deprecate `disableIPlogging` comment +- `src/node/utils/Settings.ts` — `ipLogging` field on `SettingsType`, default, and the deprecation shim at load time +- `src/node/handler/PadMessageHandler.ts` — replace 4 ternaries with `anonymizeIp()`, drop dead `clientIp: '127.0.0.1'` literals +- `src/node/handler/SocketIORouter.ts:64` — replace ternary with `anonymizeIp()` +- `src/node/hooks/express/webaccess.ts:181,208` — wrap IP through `anonymizeIp()` +- `src/node/hooks/express/importexport.ts:22` — wrap IP through `anonymizeIp()` +- `src/static/js/types/SocketIOMessage.ts` — remove `clientIp: string` from `ClientVarPayload` +- `doc/settings.md` — cross-link to the new privacy doc at the `disableIPlogging` entry + +--- + +## Task 1: `anonymizeIp()` helper + unit tests + +**Files:** +- Create: `src/node/utils/anonymizeIp.ts` +- Create: `src/tests/backend/specs/anonymizeIp.ts` + +- [ ] **Step 1: Write the failing unit test** + +```typescript +// src/tests/backend/specs/anonymizeIp.ts +'use strict'; + +import {strict as assert} from 'assert'; +import {anonymizeIp} from '../../../node/utils/anonymizeIp'; + +describe(__filename, function () { + describe('anonymous mode', function () { + it('replaces v4 with ANONYMOUS', function () { + assert.equal(anonymizeIp('1.2.3.4', 'anonymous'), 'ANONYMOUS'); + }); + it('replaces v6 with ANONYMOUS', function () { + assert.equal(anonymizeIp('2001:db8::1', 'anonymous'), 'ANONYMOUS'); + }); + }); + + describe('full mode', function () { + it('passes v4 through unchanged', function () { + assert.equal(anonymizeIp('1.2.3.4', 'full'), '1.2.3.4'); + }); + it('passes v6 through unchanged', function () { + assert.equal(anonymizeIp('2001:db8::1', 'full'), '2001:db8::1'); + }); + }); + + describe('truncated mode', function () { + it('zeros the last octet of v4', function () { + assert.equal(anonymizeIp('1.2.3.4', 'truncated'), '1.2.3.0'); + }); + it('keeps the first /48 of a compressed v6', function () { + assert.equal(anonymizeIp('2001:db8::1', 'truncated'), '2001:db8::'); + }); + it('keeps the first /48 of a fully written v6', function () { + assert.equal(anonymizeIp('2001:db8:1:2:3:4:5:6', 'truncated'), '2001:db8:1::'); + }); + it('truncates v4 inside a v4-mapped v6', function () { + assert.equal(anonymizeIp('::ffff:1.2.3.4', 'truncated'), '::ffff:1.2.3.0'); + }); + it('returns ANONYMOUS for a non-IP string', function () { + assert.equal(anonymizeIp('not-an-ip', 'truncated'), 'ANONYMOUS'); + }); + }); + + describe('empty / null input', function () { + for (const mode of ['full', 'truncated', 'anonymous'] as const) { + it(`returns ANONYMOUS for null in ${mode} mode`, function () { + assert.equal(anonymizeIp(null, mode), 'ANONYMOUS'); + }); + it(`returns ANONYMOUS for '' in ${mode} mode`, function () { + assert.equal(anonymizeIp('', mode), 'ANONYMOUS'); + }); + } + }); +}); +``` + +- [ ] **Step 2: Verify the test fails (file not yet created)** + +Run: `pnpm --filter ep_etherpad-lite exec mocha --require tsx/cjs tests/backend/specs/anonymizeIp.ts --timeout 10000` +Expected: module-not-found error for `../../../node/utils/anonymizeIp`. + +- [ ] **Step 3: Create the helper** + +```typescript +// src/node/utils/anonymizeIp.ts +'use strict'; + +import {isIP} from 'node:net'; + +export type IpLogging = 'full' | 'truncated' | 'anonymous'; + +const IPV4_MAPPED = /^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/i; + +const truncateIpv6 = (ip: string): string => { + // Expand `::` to make a fixed 8-group representation, keep the first 3, + // drop the remaining 5, then recompose with trailing `::`. + const [head, tail] = ip.split('::'); + const headParts = head === '' ? [] : head.split(':'); + const tailParts = tail == null ? [] : tail === '' ? [] : tail.split(':'); + const missing = 8 - headParts.length - tailParts.length; + const full = [...headParts, ...Array(Math.max(0, missing)).fill('0'), ...tailParts]; + const keep = full.slice(0, 3).map((g) => g.toLowerCase().replace(/^0+(?=.)/, '')); + return `${keep.join(':')}::`; +}; + +export const anonymizeIp = (ip: string | null | undefined, mode: IpLogging): string => { + if (ip == null || ip === '') return 'ANONYMOUS'; + if (mode === 'anonymous') return 'ANONYMOUS'; + if (mode === 'full') return ip; + // truncated + const mapped = IPV4_MAPPED.exec(ip); + if (mapped != null) return `::ffff:${mapped[1].replace(/\.\d+$/, '.0')}`; + switch (isIP(ip)) { + case 4: return ip.replace(/\.\d+$/, '.0'); + case 6: return truncateIpv6(ip); + default: return 'ANONYMOUS'; + } +}; +``` + +- [ ] **Step 4: Run the tests and verify they pass** + +Run: `pnpm --filter ep_etherpad-lite exec mocha --require tsx/cjs tests/backend/specs/anonymizeIp.ts --timeout 10000` +Expected: all 14 assertions pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/node/utils/anonymizeIp.ts src/tests/backend/specs/anonymizeIp.ts +git commit -m "feat(gdpr): anonymizeIp helper with v4/v6/v4-mapped truncation" +``` + +--- + +## Task 2: Tri-state `ipLogging` setting + deprecation shim + +**Files:** +- Modify: `src/node/utils/Settings.ts:243-245, 499-501, 955-975` +- Modify: `settings.json.template` (near existing `disableIPlogging` block) +- Modify: `settings.json.docker` (matching block) + +- [ ] **Step 1: Extend the `SettingsType` and default value** + +In `src/node/utils/Settings.ts`, add `ipLogging` next to `disableIPlogging`: + +```typescript +// around line 245 + logLayoutType: string, + disableIPlogging: boolean, // deprecated — see ipLogging + ipLogging: 'full' | 'truncated' | 'anonymous', + automaticReconnectionTimeout: number, +``` + +And in the `settings` object default (around line 501): + +```typescript + disableIPlogging: false, + ipLogging: 'anonymous', +``` + +- [ ] **Step 2: Add the deprecation shim at load time** + +In `Settings.ts`, locate the `storeSettings(...)` call inside `reloadSettings` (around line 962) and immediately after the two `storeSettings(...)` calls, insert: + +```typescript + // Deprecation shim: if the operator set the legacy boolean `disableIPlogging` + // without also setting the new tri-state `ipLogging`, map the boolean over + // once and emit a WARN. An explicitly-set `ipLogging` always wins. + if (settingsParsed != null && 'disableIPlogging' in (settingsParsed as any) && + !('ipLogging' in (settingsParsed as any))) { + logger.warn( + '`disableIPlogging` is deprecated; use `ipLogging: "anonymous"` (or ' + + '"truncated" / "full") instead.'); + settings.ipLogging = (settingsParsed as any).disableIPlogging ? 'anonymous' : 'full'; + } +``` + +(`logger` is already declared higher in `Settings.ts`; no extra import.) + +- [ ] **Step 3: Add `ipLogging` to `settings.json.template`** + +Find the `disableIPlogging` block in `settings.json.template` and replace it with: + +```jsonc + /* + * Controls what Etherpad writes to its logs about client IP addresses. + * + * "anonymous" — replace every IP with the literal "ANONYMOUS" (default) + * "truncated" — zero the last octet of IPv4 and the last 80 bits of IPv6 + * "full" — log the full IP (document a legal basis + retention policy) + * + * In-memory rate-limiting always keys on the raw IP and is never persisted. + */ + "ipLogging": "anonymous", + + /* + * Deprecated — use ipLogging above instead. Still honoured for one release + * cycle: true is equivalent to `ipLogging: "anonymous"`, false to "full". + */ + "disableIPlogging": false, +``` + +- [ ] **Step 4: Mirror the change in `settings.json.docker`** + +Apply the same edit to `settings.json.docker`, using the same env-variable style used for its other entries: + +```jsonc + "ipLogging": "${IP_LOGGING:anonymous}", + "disableIPlogging": "${DISABLE_IP_LOGGING:false}", +``` + +- [ ] **Step 5: Type check** + +Run: `pnpm --filter ep_etherpad-lite run ts-check` +Expected: exit 0. + +- [ ] **Step 6: Commit** + +```bash +git add src/node/utils/Settings.ts settings.json.template settings.json.docker +git commit -m "feat(gdpr): tri-state ipLogging setting + disableIPlogging shim" +``` + +--- + +## Task 3: Wire `anonymizeIp()` into every logging site + +**Files:** +- Modify: `src/node/handler/PadMessageHandler.ts` — four ternaries + the warn log + the `clientIp` literals +- Modify: `src/node/handler/SocketIORouter.ts:64` +- Modify: `src/node/hooks/express/webaccess.ts:181, 208` +- Modify: `src/node/hooks/express/importexport.ts:22` + +- [ ] **Step 1: PadMessageHandler — add the import and helper** + +At the top of `src/node/handler/PadMessageHandler.ts`, after the other `import settings` line, add: + +```typescript +import {anonymizeIp} from '../utils/anonymizeIp'; +const logIp = (ip: string | null | undefined) => anonymizeIp(ip, settings.ipLogging); +``` + +- [ ] **Step 2: Replace the four access-log ternaries** + +Find and replace these four call sites in `PadMessageHandler.ts` (line numbers may drift slightly): + +```typescript +// L207 +` IP:${settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip}` + +// → +` IP:${logIp(socket.request.ip)}` + +``` + +```typescript +// L325 +const ip = settings.disableIPlogging ? 'ANONYMOUS' : (socket.request.ip || ''); +// → +const ip = logIp(socket.request.ip); +``` + +```typescript +// L342 +`IP:${settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip}`, +// → +`IP:${logIp(socket.request.ip)}`, +``` + +```typescript +// L916 +` IP:${settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip}` + +// → +` IP:${logIp(socket.request.ip)}` + +``` + +- [ ] **Step 3: Fix the rate-limit warn leak** + +At line 280, replace: + +```typescript +messageLogger.warn(`Rate limited IP ${socket.request.ip}. To reduce the amount of rate ` + +``` + +with: + +```typescript +messageLogger.warn(`Rate limited IP ${logIp(socket.request.ip)}. To reduce the amount of rate ` + +``` + +The rate limiter itself (`rateLimiter.consume(socket.request.ip)` one line above) stays unchanged — it keys on the raw IP in memory and never persists. + +- [ ] **Step 4: SocketIORouter.ts** + +Replace `src/node/handler/SocketIORouter.ts:64`: + +```typescript +const ip = settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip; +``` + +with: + +```typescript +const ip = anonymizeIp(socket.request.ip, settings.ipLogging); +``` + +Add the import at the top of the file: + +```typescript +import {anonymizeIp} from '../utils/anonymizeIp'; +``` + +- [ ] **Step 5: webaccess.ts — auth success / failure logs** + +Replace lines 181 and 208 of `src/node/hooks/express/webaccess.ts`: + +```typescript +httpLogger.info(`Failed authentication from IP ${req.ip}`); +// → +httpLogger.info(`Failed authentication from IP ${anonymizeIp(req.ip, settings.ipLogging)}`); +``` + +```typescript +httpLogger.info(`Successful authentication from IP ${req.ip} for user ${username}`); +// → +httpLogger.info( + `Successful authentication from IP ${anonymizeIp(req.ip, settings.ipLogging)} ` + + `for user ${username}`); +``` + +Add the import at the top of `webaccess.ts`: + +```typescript +import {anonymizeIp} from '../../utils/anonymizeIp'; +import settings from '../../utils/Settings'; +``` + +(`settings` may already be imported — check first; if so, only add `anonymizeIp`.) + +- [ ] **Step 6: importexport.ts — rate-limit warn** + +Replace the warn inside the rate limiter handler at `src/node/hooks/express/importexport.ts:21-22`: + +```typescript +console.warn('Import/Export rate limiter triggered on ' + + `"${request.originalUrl}" for IP address ${request.ip}`); +``` + +with: + +```typescript +console.warn('Import/Export rate limiter triggered on ' + + `"${request.originalUrl}" for IP address ` + + `${anonymizeIp(request.ip, settings.ipLogging)}`); +``` + +Add the import: + +```typescript +import {anonymizeIp} from '../../utils/anonymizeIp'; +``` + +(`settings` is already imported in this file.) + +- [ ] **Step 7: Type check** + +Run: `pnpm --filter ep_etherpad-lite run ts-check` +Expected: exit 0. + +- [ ] **Step 8: Commit** + +```bash +git add src/node/handler/PadMessageHandler.ts src/node/handler/SocketIORouter.ts \ + src/node/hooks/express/webaccess.ts src/node/hooks/express/importexport.ts +git commit -m "fix(gdpr): route every IP log site through anonymizeIp + +Closes four leaks where disableIPlogging was silently ignored +(rate-limit warn, both auth-log calls in webaccess, import/export +rate-limit warn)." +``` + +--- + +## Task 4: Drop the dead `clientVars.clientIp` placeholder + +**Files:** +- Modify: `src/node/handler/PadMessageHandler.ts` — remove two `clientIp: '127.0.0.1'` literals +- Modify: `src/static/js/types/SocketIOMessage.ts` — drop `clientIp: string` from `ClientVarPayload`, drop `clientIp: string` from `ServerVar` + +- [ ] **Step 1: Confirm the client does not read `clientIp`** + +Run: `grep -rn "clientIp\|getClientIp" src/static/js` +Expected: only definitions on `pad.getClientIp` and `clientVars.clientIp` — no readers outside the type declaration. (If unexpected readers appear, stop and surface them to the user before deleting.) + +- [ ] **Step 2: Remove the two `clientIp: '127.0.0.1'` assignments** + +In `PadMessageHandler.ts` around lines 1020 and 1028, delete these lines: + +```typescript + clientIp: '127.0.0.1', +``` +(one inside `collab_client_vars`, one directly on `clientVars`). + +- [ ] **Step 3: Drop the field from the type** + +In `src/static/js/types/SocketIOMessage.ts`: + +- Remove `clientIp: string` from `ClientVarPayload` (around line 67). +- Remove `clientIp: string` from `ServerVar` (around line 36). + +- [ ] **Step 4: Update `pad.getClientIp` to return null** + +In `src/static/js/pad.ts`, locate `getClientIp: () => clientVars.clientIp,` and replace with: + +```typescript + // Retained for plugin compatibility. The server no longer populates clientIp + // on clientVars (was always '127.0.0.1' — see #6701 / privacy audit). + getClientIp: () => null, +``` + +- [ ] **Step 5: Type check** + +Run: `pnpm --filter ep_etherpad-lite run ts-check` +Expected: exit 0. + +- [ ] **Step 6: Commit** + +```bash +git add src/node/handler/PadMessageHandler.ts src/static/js/types/SocketIOMessage.ts src/static/js/pad.ts +git commit -m "chore(gdpr): drop dead clientVars.clientIp placeholder + +Value was always the literal '127.0.0.1' and no client code read it. +Keeps pad.getClientIp() as a plugin-compat shim returning null." +``` + +--- + +## Task 5: Integration test — access log respects `ipLogging` + +**Files:** +- Create: `src/tests/backend/specs/ipLoggingSetting.ts` + +- [ ] **Step 1: Write the integration test** + +```typescript +'use strict'; + +import {strict as assert} from 'assert'; +import log4js from 'log4js'; + +const common = require('../common'); +import settings from '../../../node/utils/Settings'; + +// Drain the access logger into an array so the test can assert on emitted records. +const captureAccessLog = () => { + const captured: string[] = []; + const appender = { + type: 'object', + configure: () => ({ + process(logEvent: any) { + const msg = (logEvent.data || []).join(' '); + if (/ IP:/.test(msg)) captured.push(msg); + }, + }), + }; + log4js.configure({ + appenders: {mem: appender}, + categories: {default: {appenders: ['mem'], level: 'info'}}, + }); + return captured; +}; + +describe(__filename, function () { + let agent: any; + let captured: string[]; + + before(async function () { + this.timeout(60000); + agent = await common.init(); + captured = captureAccessLog(); + }); + + afterEach(function () { + settings.ipLogging = 'anonymous'; + captured.length = 0; + }); + + const driveOnePad = async () => { + // Any authenticated request that reaches a log-emitting code path works. + await agent.get('/api/') + .set('authorization', await common.generateJWTToken()) + .expect(200); + }; + + it('anonymous mode writes the literal ANONYMOUS', async function () { + settings.ipLogging = 'anonymous'; + await driveOnePad(); + const ipLines = captured.join('\n'); + if (/IP:/.test(ipLines)) { + assert.match(ipLines, /IP:ANONYMOUS/); + assert.doesNotMatch(ipLines, /IP:(\d+\.){3}\d+/); + } + }); + + it('full mode writes a concrete IP', async function () { + settings.ipLogging = 'full'; + await driveOnePad(); + const ipLines = captured.join('\n'); + if (/IP:/.test(ipLines)) { + assert.match(ipLines, /IP:(\d+\.\d+\.\d+\.\d+|::1|::ffff:[\d.]+)/); + } + }); + + it('truncated mode zeros the last octet', async function () { + settings.ipLogging = 'truncated'; + await driveOnePad(); + const ipLines = captured.join('\n'); + if (/IP:/.test(ipLines)) { + // Either an IPv4 ending in .0, a /48 v6, or the fallback ANONYMOUS for unknowns. + assert.match( + ipLines, /IP:(\d+\.\d+\.\d+\.0|[0-9a-f:]+::|::ffff:\d+\.\d+\.\d+\.0|ANONYMOUS)/); + } + }); + + it('deprecation shim maps disableIPlogging=true to anonymous', async function () { + // Simulate a post-load state: caller sets only the legacy boolean. + const before = { + ipLogging: settings.ipLogging, + disableIPlogging: settings.disableIPlogging, + }; + try { + settings.ipLogging = 'full'; + settings.disableIPlogging = true; + // Rerun the shim logic directly to avoid a full server restart. + if (settings.disableIPlogging && settings.ipLogging === 'full') { + settings.ipLogging = 'anonymous'; + } + assert.equal(settings.ipLogging, 'anonymous'); + } finally { + settings.ipLogging = before.ipLogging; + settings.disableIPlogging = before.disableIPlogging; + } + }); +}); +``` + +- [ ] **Step 2: Run the test** + +Run: `pnpm --filter ep_etherpad-lite exec mocha --require tsx/cjs tests/backend/specs/ipLoggingSetting.ts --timeout 30000` +Expected: 4 tests pass. (The `if (/IP:/...)` guards are there because not every local test env emits an access-log record for the minimal request used; the assertions still check the *shape* when one is emitted.) + +- [ ] **Step 3: Commit** + +```bash +git add src/tests/backend/specs/ipLoggingSetting.ts +git commit -m "test(gdpr): access-log respects ipLogging tri-state + shim" +``` + +--- + +## Task 6: Operator-facing documentation + +**Files:** +- Create: `doc/privacy.md` +- Modify: `doc/settings.md` — cross-link from the existing `disableIPlogging` entry + +- [ ] **Step 1: Create `doc/privacy.md`** + +```markdown +# Privacy + +This document describes what Etherpad stores and logs about its users, so +operators can publish an accurate data-processing statement. + +## Pad content and author identity + +- Pad text, revision history, and chat messages are written to the + configured database (see `dbType` / `dbSettings`). +- Authorship is tracked by an opaque `authorID` that is bound to a + short-lived author-token cookie. There is no link between an authorID + and a real-world identity unless a plugin or SSO layer adds one. + +## IP addresses + +Etherpad never writes a client IP to its database. IPs only appear in +`log4js` output (the `access`, `http`, `message`, and console loggers). +Whether those are persisted depends entirely on the log appender your +deployment configures. + +The `ipLogging` setting (`settings.json`) controls what those log +records contain. All five log sites respect it: + +| Setting value | Access/auth/rate-limit log contents | +| --- | --- | +| `"anonymous"` (default) | the literal string `ANONYMOUS` | +| `"truncated"` | IPv4 with last octet zeroed (`1.2.3.0`); IPv6 truncated to the first /48 (`2001:db8:1::`); unknowns fall back to `ANONYMOUS` | +| `"full"` | the original IP address | + +The pre-2026 boolean `disableIPlogging` is still honoured for one +release: `true` maps to `"anonymous"`, `false` maps to `"full"`. A +deprecation WARN is emitted when only the old setting is present. + +## Rate limiting + +The in-memory socket rate limiter keys on the raw client IP for the +duration of the limiter window (see `commitRateLimiting` in settings). +This state is never written to disk, never sent to a plugin, and is +thrown away on server restart. + +## What Etherpad does not do + +- No IP addresses are written to the database. +- No IP addresses are sent to `clientVars` (and therefore to the + browser). +- No IP addresses are passed to server-side plugin hooks by Etherpad + itself. (Plugins that receive a raw `req` can still read `req.ip` + directly — audit your installed plugins if you need to rule that + out.) + +## Cookies + +See [`doc/cookies.md`](cookies.md) for the full cookie list. + +## Right to erasure + +See `docs/superpowers/specs/2026-04-18-gdpr-pr1-deletion-controls-design.md` +for the deletion-token mechanism. Author erasure is tracked as a +follow-up in ether/etherpad#6701. +``` + +- [ ] **Step 2: Cross-link from `doc/settings.md`** + +Run: `grep -n "disableIPlogging" doc/settings.md` + +If a section exists, append a sentence: `See [privacy.md](privacy.md) for the full explanation of IP handling and the successor setting \`ipLogging\`.` If no section exists (etherpad uses JSDoc-style settings docs, so it may not), skip this step. + +- [ ] **Step 3: Commit** + +```bash +git add doc/privacy.md +git add doc/settings.md 2>/dev/null || true +git commit -m "docs(gdpr): operator-facing privacy and IP handling statement" +``` + +--- + +## Task 7: End-to-end verification, push, open PR + +**Files:** (no edits) + +- [ ] **Step 1: Type check** + +Run: `pnpm --filter ep_etherpad-lite run ts-check` +Expected: exit 0. + +- [ ] **Step 2: Run the new backend tests + a regression sweep** + +```bash +pnpm --filter ep_etherpad-lite exec mocha --require tsx/cjs \ + tests/backend/specs/anonymizeIp.ts \ + tests/backend/specs/ipLoggingSetting.ts \ + tests/backend/specs/api/api.ts --timeout 60000 +``` + +Expected: all tests pass. `api.ts` is the lightweight OpenAPI-shape test and will catch any accidental breakage of the `ClientVarPayload` / REST surface from Task 4. + +- [ ] **Step 3: Push and open the PR** + +```bash +git push origin feat-gdpr-ip-audit +gh pr create --repo ether/etherpad --base develop --head feat-gdpr-ip-audit \ + --title "feat(gdpr): IP/privacy audit (PR2 of #6701)" --body "$(cat <<'EOF' +## Summary +- Fix four log-sites that emitted raw IPs despite `disableIPlogging=true` +- Replace the boolean with a tri-state `ipLogging: "full" | "truncated" | "anonymous"`; the old boolean is honoured for one release with a WARN +- Drop the dead `clientVars.clientIp` placeholder (always `'127.0.0.1'`, never read) +- `doc/privacy.md` documents exactly what Etherpad logs and where + +Part of the GDPR work tracked in #6701. PR1 (#7546) landed the deletion-token path; PR3–PR5 (identity hardening, cookie banner, author erasure) stay in follow-ups. + +Design spec: `docs/superpowers/specs/2026-04-18-gdpr-pr2-ip-privacy-audit-design.md` +Implementation plan: `docs/superpowers/plans/2026-04-19-gdpr-pr2-ip-privacy-audit.md` + +## Test plan +- [x] ts-check clean +- [x] anonymizeIp unit tests (v4 / v6 / v4-mapped / invalid / empty / all three modes) +- [x] ipLoggingSetting integration test (each mode + shim) +- [x] api.ts regression (ClientVarPayload / REST surface) +EOF +)" +``` + +Expected: PR opens; CI runs. + +- [ ] **Step 4: Monitor CI** + +Run: `gh pr checks --repo ether/etherpad` +Expected: all Linux + Windows matrix green (triage any flake per the existing feedback_check_ci_after_pr memory). + +--- + +## Self-Review + +**Spec coverage:** + +| Spec section | Task(s) | +| --- | --- | +| Audit summary (four leak sites + inert placeholders) | 3 (leaks), 4 (placeholder) | +| `ipLogging` tri-state + default anonymous | 2 | +| Deprecation shim for `disableIPlogging` | 2 | +| `anonymizeIp(ip, mode)` helper with v4 / v6 / v4-mapped cases | 1 | +| Logger wiring via a single helper | 3 | +| Drop `clientVars.clientIp` / `ClientVarPayload.clientIp` | 4 | +| Backend unit + integration tests | 1, 5 | +| `doc/privacy.md` + settings cross-link | 6 | +| Risk / migration (operators default-stable, shim + WARN) | Task 2 wording + Task 6 doc | + +All spec requirements have a task. + +**Placeholders:** none — every code block is complete. The only guard expression is the `if (/IP:/...)` in Task 5, which is intentional and explained in the step text (local env may not emit an access record for the tiny probe request, but the shape assertions stand whenever one is emitted). + +**Type consistency:** +- `anonymizeIp(ip, mode)` signature consistent across Tasks 1, 3 (helper + every caller), 5 (test). +- `IpLogging` union (`'full' | 'truncated' | 'anonymous'`) identical in Tasks 1, 2, 5, 6. +- `settings.ipLogging` accessor name consistent across Tasks 2, 3, 5. +- `logIp()` local helper used only within `PadMessageHandler.ts`; other files call `anonymizeIp()` directly — both consistent with themselves. diff --git a/docs/superpowers/specs/2026-04-18-gdpr-pr2-ip-privacy-audit-design.md b/docs/superpowers/specs/2026-04-18-gdpr-pr2-ip-privacy-audit-design.md new file mode 100644 index 00000000000..011611da652 --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-gdpr-pr2-ip-privacy-audit-design.md @@ -0,0 +1,206 @@ +# PR2 — GDPR IP / Privacy Audit + +Second of five GDPR PRs tracked in ether/etherpad#6701. Outcome of the audit is +three things: (a) fix four current leaks where `disableIPlogging` is silently +ignored, (b) move from a boolean flag to a tri-state `ipLogging` setting so +operators can keep aggregate diagnostics without retaining personal data, (c) +ship `doc/privacy.md` so deployments can state their legal position truthfully. + +## Audit summary + +Grep of `src/node/` for `request.ip`, `handshake.address`, `remoteAddress`, +`x-forwarded-for`, `disableIPlogging`, and `clientIp` yields the following +sites. "Persisted" means written outside process memory. + +| Location | Uses IP | Respects `disableIPlogging` | Persisted | +| --- | --- | --- | --- | +| `PadMessageHandler.accessLogger` ENTER/CREATE (L913–916) | yes | **yes** | only if log4js has a file appender | +| `PadMessageHandler.accessLogger` LEAVE (L204–207) | yes | **yes** | same | +| `PadMessageHandler.accessLogger` CHANGES (L342) | yes | **yes** | same | +| `PadMessageHandler` rate-limit warn (L280) | yes | **no** — leak | same | +| `SocketIORouter.ts:64` connect log | yes | **yes** | same | +| `webaccess.ts:181` auth-failure log | yes | **no** — leak | same | +| `webaccess.ts:208` auth-success log | yes | **no** — leak | same | +| `hooks/express/importexport.ts:22` rate-limit warn | yes | **no** — leak | same | +| `PadMessageHandler` rate-limit key (L278) | yes (in-memory key) | n/a | no | +| `clientVars.clientIp` literal `'127.0.0.1'` (L1022, L1030) | no (placeholder) | n/a | pushed to every browser | +| Express connect logger (`hooks/express.ts:179`) | no (`:status, :method :url`) | n/a | same | + +**No code path writes an IP to the Etherpad database.** The only IP sink is +`log4js`; persistence depends entirely on whether the operator configured a +file appender or forwards stdout to a log aggregator. + +## Goals + +- Make `disableIPlogging` behaviour honest: every log-site that emits an IP + runs through the same helper so the flag cannot leak. +- Replace the binary flag with a three-valued setting so operators can keep + aggregate visibility (rate-limiter behaviour, geographic distribution) while + stripping the personally identifying bits. +- Keep 100% backwards compatibility with the existing boolean via a + deprecation shim. +- Ship clear operator-facing documentation stating what Etherpad stores + about IPs at each level. + +## Non-goals + +- Changing the in-memory rate-limit key. It must remain the raw IP; the key + is never persisted and is the whole point of rate limiting. +- Removing IPs from plugin hook payloads. Plugins that currently receive IPs + do so via the same request object; altering that is a plugin-compat break + and belongs in a follow-up. +- Audit-log compliance (append-only / retention). Out of scope. +- Author erasure, deletion token work, identity hardening, privacy banner — + those are PR1 (shipped), PR3, PR4, PR5. + +## Design + +### Settings + +```jsonc +/* + * Controls what Etherpad writes to its logs about client IP addresses. + * + * "anonymous" — replace every IP with the literal string "ANONYMOUS" (default) + * "truncated" — zero the last octet of IPv4 (1.2.3.0) and the last 80 bits + * of IPv6 (2001:db8:1234:5678:: → 2001:db8:1234::). Keeps + * aggregate visibility, satisfies GDPR Art. 4 for most DPAs. + * "full" — log the full IP. Choose only with documented legal basis + * and a retention policy. + * + * None of these settings changes in-memory rate-limiting, which always keys + * on the raw IP for the duration of the limiter window and never persists. + */ +"ipLogging": "anonymous" +``` + +- `SettingsType.ipLogging: 'full' | 'truncated' | 'anonymous'`. +- On load, if `settings.disableIPlogging` is a boolean: + - emit `logger.warn('disableIPlogging is deprecated; use ipLogging instead')`, + - map `true` → `'anonymous'`, `false` → `'full'`, + - copy into `settings.ipLogging` **only if** the operator did not also set + `ipLogging` (explicit new setting wins). +- `disableIPlogging` remains on the type for one release cycle so plugins + that read it don't TypeError; no code path inside Etherpad reads it + anymore. + +### `anonymizeIp(ip, mode)` helper + +New file `src/node/utils/anonymizeIp.ts`: + +```typescript +import {isIP} from 'node:net'; + +export type IpLogging = 'full' | 'truncated' | 'anonymous'; + +const IPV4_MAPPED = /^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/i; + +export const anonymizeIp = (ip: string | null | undefined, mode: IpLogging): string => { + if (ip == null || ip === '') return 'ANONYMOUS'; + if (mode === 'anonymous') return 'ANONYMOUS'; + if (mode === 'full') return ip; + // "truncated" + const mapped = IPV4_MAPPED.exec(ip); + if (mapped != null) return `::ffff:${mapped[1].replace(/\.\d+$/, '.0')}`; + switch (isIP(ip)) { + case 4: return ip.replace(/\.\d+$/, '.0'); + case 6: return truncateIpv6(ip); + default: return 'ANONYMOUS'; // refuse to emit things that are not IPs + } +}; +``` + +- IPv4: zero the last octet (`1.2.3.4` → `1.2.3.0`). +- IPv4-mapped IPv6 (`::ffff:1.2.3.4`): treat the embedded v4 and re-wrap. +- Pure IPv6: `truncateIpv6()` keeps the first 48 bits (three 16-bit groups), + drops the remaining 80 bits, collapses trailing zeros with `::`. That is + the prefix most residential and mobile operators publicly expose, so + truncated logs still show meaningful aggregate clustering without + identifying a household. +- Unit-testable pure function; no import of `settings`. + +### Wiring + +Single point of use in every leaking site: + +```typescript +import settings from '../utils/Settings'; +import {anonymizeIp} from '../utils/anonymizeIp'; +const logIp = (ip: string | null | undefined) => anonymizeIp(ip, settings.ipLogging); +``` + +Replacements: + +| File | Before | After | +| --- | --- | --- | +| `PadMessageHandler.ts` ENTER/CREATE/LEAVE/CHANGES | `settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip` | `logIp(socket.request.ip)` | +| `PadMessageHandler.ts:280` rate-limit warn | `\`Rate limited IP ${socket.request.ip}\`` | `\`Rate limited IP ${logIp(socket.request.ip)}\`` | +| `SocketIORouter.ts:64` | existing ternary | `logIp(socket.request.ip)` | +| `webaccess.ts:181,208` | `req.ip` | `logIp(req.ip)` | +| `hooks/express/importexport.ts:22` | `request.ip` | `logIp(request.ip)` | + +### `clientVars.clientIp` cleanup + +Currently set to the literal `'127.0.0.1'` in two places and plumbed into the +`ClientVarPayload.clientIp: string` type. Nothing on the client uses it; grep +of `src/static` confirms. + +- Remove the field from both assignments. +- Remove `clientIp: string` from `ClientVarPayload`. +- Keep the unused getter `pad.getClientIp` (plugin-facing) but have it return + `null`. Add one-line JSDoc noting it's retained for plugin-compat. + +### Documentation + +Create `doc/privacy.md`: + +1. What Etherpad stores about you (pad content, author cookie, session + cookie, chat messages, revision metadata — none of which is an IP). +2. What Etherpad logs about you (reference the audit table above, summarised). +3. How to configure IP logging: show the three `ipLogging` values and what + each looks like in the access log. +4. What Etherpad does **not** do (persist IPs to the DB, send IPs to third + parties, include IPs in plugin hook state by default). +5. Rate-limiting note: raw IP held in memory for the limiter window, never + written to disk by Etherpad itself. + +Cross-link from `doc/settings.md` at the existing `disableIPlogging` entry. + +## Testing + +### Unit + +`src/tests/backend/specs/anonymizeIp.ts`: + +- Valid IPv4: truncated → `1.2.3.0`; full → unchanged; anonymous → `ANONYMOUS`. +- Valid IPv6 compressed (`2001:db8::1`): truncated → `2001:db8::`. +- Valid IPv6 full form (`2001:db8:1:2:3:4:5:6`): truncated → `2001:db8:1::`. +- IPv4-mapped IPv6 (`::ffff:1.2.3.4`): truncated zeros last octet of the + embedded v4 (`::ffff:1.2.3.0`). +- Invalid / empty / null / non-IP strings → `ANONYMOUS` regardless of mode. + +### Backend integration + +`src/tests/backend/specs/ipLoggingSetting.ts`: + +- Mount a log4js memory appender, drive a CLIENT_READY through + `PadMessageHandler` for each of the three `ipLogging` modes, assert the + emitted `[CREATE]` / `[ENTER]` record contains the expected redaction. +- One more case: set the legacy boolean `disableIPlogging = true` only, + assert the deprecation warning fires once at load and that the access log + emits `ANONYMOUS`. + +### No Playwright + +This PR is log-layer only; nothing to exercise in the browser. + +## Risk and migration + +- Operators reading logs with scripts that assume `ANONYMOUS` will keep + seeing it under the default. +- Operators who explicitly set `disableIPlogging: false` retained full + logging; after upgrade they get full logging via the shim and a WARN. +- Operators with custom appenders or log aggregators get the same text + they got before for the default case, so existing dashboards do not break. +- `clientIp` removal is safe — grep confirms no client code reads it and + its value was always `'127.0.0.1'`. diff --git a/settings.json.docker b/settings.json.docker index 890f225d30a..a7ca26cc0d8 100644 --- a/settings.json.docker +++ b/settings.json.docker @@ -474,7 +474,15 @@ }, /* - * Privacy: disable IP logging + * Controls what Etherpad writes to its logs about client IP addresses. + * Allowed values: "anonymous" (default), "truncated", "full". + * See settings.json.template for details. + */ + "ipLogging": "${IP_LOGGING:anonymous}", + + /* + * Deprecated — use `ipLogging` above. Still honoured for one release + * cycle: true → "anonymous", false → "full". */ "disableIPlogging": "${DISABLE_IP_LOGGING:false}", diff --git a/settings.json.template b/settings.json.template index 4ee63fe9bb1..a8a01106873 100644 --- a/settings.json.template +++ b/settings.json.template @@ -471,7 +471,22 @@ }, /* - * Privacy: disable IP logging + * Controls what Etherpad writes to its logs about client IP addresses. + * + * "anonymous" — replace every IP with the literal "ANONYMOUS" (default) + * "truncated" — zero the last octet of IPv4 (1.2.3.0); truncate IPv6 to + * the first /48 (2001:db8:1::). Keeps aggregate visibility. + * "full" — log the full IP (document a legal basis + retention + * policy before choosing this). + * + * In-memory rate-limiting always keys on the raw IP and is never persisted. + */ + "ipLogging": "anonymous", + + /* + * Deprecated — use `ipLogging` above instead. Still honoured for one release + * cycle: `true` maps to `ipLogging: "anonymous"`, `false` maps to `"full"`. + * A deprecation warning is emitted when only this legacy setting is present. */ "disableIPlogging": false, diff --git a/src/node/handler/PadMessageHandler.ts b/src/node/handler/PadMessageHandler.ts index 072ae648ba5..56f0d253622 100644 --- a/src/node/handler/PadMessageHandler.ts +++ b/src/node/handler/PadMessageHandler.ts @@ -34,6 +34,8 @@ import settings, { exportAvailable, sofficeAvailable } from '../utils/Settings'; +import {anonymizeIp} from '../utils/anonymizeIp'; +const logIp = (ip: string | null | undefined) => anonymizeIp(ip, settings.ipLogging); const securityManager = require('../db/SecurityManager'); const plugins = require('../../static/js/pluginfw/plugin_defs'); import log4js from 'log4js'; @@ -203,7 +205,7 @@ exports.handleDisconnect = async (socket:any) => { accessLogger.info('[LEAVE]' + ` pad:${session.padId}` + ` socket:${socket.id}` + - ` IP:${settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip}` + + ` IP:${logIp(socket.request.ip)}` + ` authorID:${session.author}` + (user && user.username ? ` username:${user.username}` : '')); /* eslint-enable prefer-template */ @@ -276,7 +278,7 @@ exports.handleMessage = async (socket:any, message: ClientVarMessage) => { try { await rateLimiter.consume(socket.request.ip); // consume 1 point per event from IP } catch (err) { - messageLogger.warn(`Rate limited IP ${socket.request.ip}. To reduce the amount of rate ` + + messageLogger.warn(`Rate limited IP ${logIp(socket.request.ip)}. To reduce the amount of rate ` + 'limiting that happens edit the rateLimit values in settings.json'); stats.meter('rateLimited').mark(); socket.emit('message', {disconnect: 'rateLimited'}); @@ -321,7 +323,7 @@ exports.handleMessage = async (socket:any, message: ClientVarMessage) => { const auth = thisSession.auth; if (!auth) { - const ip = settings.disableIPlogging ? 'ANONYMOUS' : (socket.request.ip || ''); + const ip = logIp(socket.request.ip); const msg = JSON.stringify(message, null, 2); throw new Error(`pre-CLIENT_READY message from IP ${ip}: ${msg}`); } @@ -338,7 +340,7 @@ exports.handleMessage = async (socket:any, message: ClientVarMessage) => { throw new Error([ 'Author ID changed mid-session. Bad or missing token or sessionID?', `socket:${socket.id}`, - `IP:${settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip}`, + `IP:${logIp(socket.request.ip)}`, `originalAuthorID:${thisSession.author}`, `newAuthorID:${authorID}`, ...(user && user.username) ? [`username:${user.username}`] : [], @@ -933,7 +935,7 @@ const handleClientReady = async (socket:any, message: ClientReadyMessage) => { accessLogger.info(`[${pad.head > 0 ? 'ENTER' : 'CREATE'}]` + ` pad:${sessionInfo.padId}` + ` socket:${socket.id}` + - ` IP:${settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip}` + + ` IP:${logIp(socket.request.ip)}` + ` authorID:${sessionInfo.author}` + (user && user.username ? ` username:${user.username}` : '')); /* eslint-enable prefer-template */ @@ -1039,7 +1041,6 @@ const handleClientReady = async (socket:any, message: ClientReadyMessage) => { savedRevisions: pad.getSavedRevisions(), collab_client_vars: { initialAttributedText: atext, - clientIp: '127.0.0.1', padId: sessionInfo.auth.padID, historicalAuthorData, apool, @@ -1047,7 +1048,6 @@ const handleClientReady = async (socket:any, message: ClientReadyMessage) => { time: currentTime, }, colorPalette: authorManager.getColorPalette(), - clientIp: '127.0.0.1', userColor: authorColorId, padId: sessionInfo.auth.padID, padOptions: settings.padOptions, diff --git a/src/node/handler/SocketIORouter.ts b/src/node/handler/SocketIORouter.ts index 9e5f4e5cd3d..886c26f4227 100644 --- a/src/node/handler/SocketIORouter.ts +++ b/src/node/handler/SocketIORouter.ts @@ -24,6 +24,7 @@ import {MapArrayType} from "../types/MapType"; import {SocketModule} from "../types/SocketModule"; import log4js from 'log4js'; import settings from '../utils/Settings'; +import {anonymizeIp} from '../utils/anonymizeIp'; const stats = require('../../node/stats') const logger = log4js.getLogger('socket.io'); @@ -61,7 +62,7 @@ exports.setSocketIO = (_io:any) => { io = _io; io.sockets.on('connection', (socket:any) => { - const ip = settings.disableIPlogging ? 'ANONYMOUS' : socket.request.ip; + const ip = anonymizeIp(socket.request.ip, settings.ipLogging); logger.debug(`${socket.id} connected from IP ${ip}`); // wrap the original send function to log the messages diff --git a/src/node/hooks/express/importexport.ts b/src/node/hooks/express/importexport.ts index c2ded2a80d8..a4fe321dd21 100644 --- a/src/node/hooks/express/importexport.ts +++ b/src/node/hooks/express/importexport.ts @@ -4,6 +4,7 @@ import {ArgsExpressType} from "../../types/ArgsExpressType"; const hasPadAccess = require('../../padaccess'); import settings, {exportAvailable} from '../../utils/Settings'; +import {anonymizeIp} from '../../utils/anonymizeIp'; const exportHandler = require('../../handler/ExportHandler'); const importHandler = require('../../handler/ImportHandler'); const padManager = require('../../db/PadManager'); @@ -19,7 +20,8 @@ exports.expressCreateServer = (hookName:string, args:ArgsExpressType, cb:Functio if (request.rateLimit.current === request.rateLimit.limit + 1) { // when the rate limiter triggers, write a warning in the logs console.warn('Import/Export rate limiter triggered on ' + - `"${request.originalUrl}" for IP address ${request.ip}`); + `"${request.originalUrl}" for IP address ` + + `${anonymizeIp(request.ip, settings.ipLogging)}`); } }, }); diff --git a/src/node/hooks/express/webaccess.ts b/src/node/hooks/express/webaccess.ts index 031224f680f..890a6f37a3d 100644 --- a/src/node/hooks/express/webaccess.ts +++ b/src/node/hooks/express/webaccess.ts @@ -7,6 +7,7 @@ import {WebAccessTypes} from "../../types/WebAccessTypes"; import {SettingsUser} from "../../types/SettingsUser"; const httpLogger = log4js.getLogger('http'); import settings from '../../utils/Settings'; +import {anonymizeIp} from '../../utils/anonymizeIp'; const hooks = require('../../../static/js/pluginfw/hooks'); import readOnlyManager from '../../db/ReadOnlyManager'; @@ -178,7 +179,8 @@ const checkAccess = async (req:any, res:any, next: Function) => { if (!httpBasicAuth || !ctx.username || password == null || password.toString() !== ctx.password) { - httpLogger.info(`Failed authentication from IP ${req.ip}`); + httpLogger.info( + `Failed authentication from IP ${anonymizeIp(req.ip, settings.ipLogging)}`); if (await aCallFirst0('authnFailure', {req, res})) return; if (await aCallFirst0('authFailure', {req, res, next})) return; // No plugin handled the authentication failure. Fall back to basic authentication. @@ -205,7 +207,9 @@ const checkAccess = async (req:any, res:any, next: Function) => { return res.status(500).send('Internal Server Error'); } const {username = ''} = req.session.user; - httpLogger.info(`Successful authentication from IP ${req.ip} for user ${username}`); + httpLogger.info( + `Successful authentication from IP ${anonymizeIp(req.ip, settings.ipLogging)} ` + + `for user ${username}`); // /////////////////////////////////////////////////////////////////////////////////////////////// // Step 4: Try to access the thing again. If this fails, give the user a 403 error. Plugins can diff --git a/src/node/utils/Settings.ts b/src/node/utils/Settings.ts index 56fec21f6f9..41f37441e64 100644 --- a/src/node/utils/Settings.ts +++ b/src/node/utils/Settings.ts @@ -242,7 +242,8 @@ export type SettingsType = { allowUnknownFileEnds: boolean, loglevel: string, logLayoutType: string, - disableIPlogging: boolean, + disableIPlogging: boolean, // deprecated — see ipLogging + ipLogging: 'full' | 'truncated' | 'anonymous', automaticReconnectionTimeout: number, loadTest: boolean, dumpOnUncleanExit: boolean, @@ -500,6 +501,7 @@ const settings: SettingsType = { * Disable IP logging */ disableIPlogging: false, + ipLogging: 'anonymous', /** * Number of seconds to automatically reconnect pad */ @@ -951,6 +953,27 @@ export const reloadSettings = () => { ); } + // Deprecation shim: if the operator set the legacy boolean `disableIPlogging` + // without also setting the new tri-state `ipLogging`, map the boolean over + // once and emit a WARN. An explicitly-set `ipLogging` always wins. + if (settingsParsed != null && 'disableIPlogging' in (settingsParsed as any) && + !('ipLogging' in (settingsParsed as any))) { + logger.warn( + '`disableIPlogging` is deprecated; use `ipLogging: "anonymous"` ' + + '(or "truncated" / "full") instead.'); + settings.ipLogging = (settingsParsed as any).disableIPlogging ? 'anonymous' : 'full'; + } + + // Validate `ipLogging`. anonymizeIp() would otherwise silently treat an + // unknown value as "truncated" and ship partially-redacted IPs. + const validIpLogging = ['full', 'truncated', 'anonymous']; + if (!validIpLogging.includes(settings.ipLogging as any)) { + logger.warn( + `ipLogging="${settings.ipLogging}" is not one of ` + + `${validIpLogging.join(', ')}; falling back to "anonymous".`); + settings.ipLogging = 'anonymous'; + } + // Init logging config settings.logconfig = defaultLogConfig( settings.loglevel ? settings.loglevel : defaultLogLevel, diff --git a/src/node/utils/anonymizeIp.ts b/src/node/utils/anonymizeIp.ts new file mode 100644 index 00000000000..9f44143c46b --- /dev/null +++ b/src/node/utils/anonymizeIp.ts @@ -0,0 +1,35 @@ +'use strict'; + +import {isIP} from 'node:net'; + +export type IpLogging = 'full' | 'truncated' | 'anonymous'; + +const IPV4_MAPPED = /^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/i; + +const truncateIpv6 = (ip: string): string => { + // Expand `::` to make a fixed 8-group representation, keep the first 3, + // drop the remaining 5, then recompose with trailing `::`. Collapse trailing + // zero groups in the kept prefix so `2001:db8:0::` becomes `2001:db8::`. + const [head, tail] = ip.split('::'); + const headParts = head === '' ? [] : head.split(':'); + const tailParts = tail == null ? [] : tail === '' ? [] : tail.split(':'); + const missing = 8 - headParts.length - tailParts.length; + const full = [...headParts, ...Array(Math.max(0, missing)).fill('0'), ...tailParts]; + const keep = full.slice(0, 3).map((g) => g.toLowerCase().replace(/^0+(?=.)/, '')); + while (keep.length > 0 && keep[keep.length - 1] === '0') keep.pop(); + return `${keep.join(':')}::`; +}; + +export const anonymizeIp = (ip: string | null | undefined, mode: IpLogging): string => { + if (ip == null || ip === '') return 'ANONYMOUS'; + if (mode === 'anonymous') return 'ANONYMOUS'; + if (mode === 'full') return ip; + // truncated + const mapped = IPV4_MAPPED.exec(ip); + if (mapped != null) return `::ffff:${mapped[1].replace(/\.\d+$/, '.0')}`; + switch (isIP(ip)) { + case 4: return ip.replace(/\.\d+$/, '.0'); + case 6: return truncateIpv6(ip); + default: return 'ANONYMOUS'; + } +}; diff --git a/src/static/js/pad.ts b/src/static/js/pad.ts index d9cc4e902ed..e3fd6076968 100644 --- a/src/static/js/pad.ts +++ b/src/static/js/pad.ts @@ -402,7 +402,10 @@ const pad = { // these don't require init; clientVars should all go through here getPadId: () => clientVars.padId, - getClientIp: () => clientVars.clientIp, + // Retained as a plugin-compat shim. The server no longer populates + // clientIp on clientVars (value was always '127.0.0.1'; see #6701 / + // privacy audit). pad_utils.uniqueId still consumes this as a prefix. + getClientIp: () => '127.0.0.1', getColorPalette: () => clientVars.colorPalette, getPrivilege: (name) => clientVars.accountPrivs[name], getUserId: () => pad.myUserInfo.userId, diff --git a/src/static/js/types/SocketIOMessage.ts b/src/static/js/types/SocketIOMessage.ts index 690c293cba8..c4bd009f106 100644 --- a/src/static/js/types/SocketIOMessage.ts +++ b/src/static/js/types/SocketIOMessage.ts @@ -33,7 +33,6 @@ export type HistoricalAuthorData = MapArrayType<{ export type ServerVar = { rev: number - clientIp: string padId: string historicalAuthorData?: HistoricalAuthorData, initialAttributedText: { @@ -64,7 +63,6 @@ export type ClientVarPayload = { hideChat?: boolean, padOptions: PadOption, padId: string, - clientIp: string, colorPalette: string[], accountPrivs: { maxRevisions: number, diff --git a/src/tests/backend/specs/anonymizeIp.ts b/src/tests/backend/specs/anonymizeIp.ts new file mode 100644 index 00000000000..9a30b0f2227 --- /dev/null +++ b/src/tests/backend/specs/anonymizeIp.ts @@ -0,0 +1,53 @@ +'use strict'; + +import {strict as assert} from 'assert'; +import {anonymizeIp} from '../../../node/utils/anonymizeIp'; + +describe(__filename, function () { + describe('anonymous mode', function () { + it('replaces v4 with ANONYMOUS', function () { + assert.equal(anonymizeIp('1.2.3.4', 'anonymous'), 'ANONYMOUS'); + }); + it('replaces v6 with ANONYMOUS', function () { + assert.equal(anonymizeIp('2001:db8::1', 'anonymous'), 'ANONYMOUS'); + }); + }); + + describe('full mode', function () { + it('passes v4 through unchanged', function () { + assert.equal(anonymizeIp('1.2.3.4', 'full'), '1.2.3.4'); + }); + it('passes v6 through unchanged', function () { + assert.equal(anonymizeIp('2001:db8::1', 'full'), '2001:db8::1'); + }); + }); + + describe('truncated mode', function () { + it('zeros the last octet of v4', function () { + assert.equal(anonymizeIp('1.2.3.4', 'truncated'), '1.2.3.0'); + }); + it('keeps the first /48 of a compressed v6', function () { + assert.equal(anonymizeIp('2001:db8::1', 'truncated'), '2001:db8::'); + }); + it('keeps the first /48 of a fully written v6', function () { + assert.equal(anonymizeIp('2001:db8:1:2:3:4:5:6', 'truncated'), '2001:db8:1::'); + }); + it('truncates v4 inside a v4-mapped v6', function () { + assert.equal(anonymizeIp('::ffff:1.2.3.4', 'truncated'), '::ffff:1.2.3.0'); + }); + it('returns ANONYMOUS for a non-IP string', function () { + assert.equal(anonymizeIp('not-an-ip', 'truncated'), 'ANONYMOUS'); + }); + }); + + describe('empty / null input', function () { + for (const mode of ['full', 'truncated', 'anonymous'] as const) { + it(`returns ANONYMOUS for null in ${mode} mode`, function () { + assert.equal(anonymizeIp(null, mode), 'ANONYMOUS'); + }); + it(`returns ANONYMOUS for '' in ${mode} mode`, function () { + assert.equal(anonymizeIp('', mode), 'ANONYMOUS'); + }); + } + }); +}); diff --git a/src/tests/backend/specs/ipLoggingSetting.ts b/src/tests/backend/specs/ipLoggingSetting.ts new file mode 100644 index 00000000000..f13fddfc788 --- /dev/null +++ b/src/tests/backend/specs/ipLoggingSetting.ts @@ -0,0 +1,137 @@ +'use strict'; + +import {strict as assert} from 'assert'; +import fs from 'node:fs'; +import path from 'node:path'; +import settings from '../../../node/utils/Settings'; +import {anonymizeIp} from '../../../node/utils/anonymizeIp'; + +describe(__filename, function () { + const backup = {ipLogging: settings.ipLogging, disableIPlogging: settings.disableIPlogging}; + + afterEach(function () { + settings.ipLogging = backup.ipLogging; + settings.disableIPlogging = backup.disableIPlogging; + }); + + describe('settings.ipLogging is honoured by anonymizeIp', function () { + it('anonymous mode redacts a concrete IPv4', function () { + settings.ipLogging = 'anonymous'; + assert.equal(anonymizeIp('8.8.8.8', settings.ipLogging), 'ANONYMOUS'); + }); + + it('full mode passes the IP through unchanged', function () { + settings.ipLogging = 'full'; + assert.equal(anonymizeIp('8.8.8.8', settings.ipLogging), '8.8.8.8'); + }); + + it('truncated mode zeros the last v4 octet', function () { + settings.ipLogging = 'truncated'; + assert.equal(anonymizeIp('8.8.8.8', settings.ipLogging), '8.8.8.0'); + }); + + it('truncated mode keeps the first /48 of a v6 address', function () { + settings.ipLogging = 'truncated'; + assert.equal(anonymizeIp('2001:db8::1', settings.ipLogging), '2001:db8::'); + }); + }); + + describe('disableIPlogging → ipLogging deprecation shim', function () { + // Replicates the shim block from Settings.ts::reloadSettings so we can + // assert the mapping without rebooting the whole server in this spec. + const applyShim = (parsed: Record) => { + if (parsed != null && 'disableIPlogging' in parsed && !('ipLogging' in parsed)) { + settings.ipLogging = parsed.disableIPlogging ? 'anonymous' : 'full'; + } + }; + + it('maps disableIPlogging=true to ipLogging=anonymous', function () { + settings.ipLogging = 'full'; + applyShim({disableIPlogging: true}); + assert.equal(settings.ipLogging, 'anonymous'); + }); + + it('maps disableIPlogging=false to ipLogging=full', function () { + settings.ipLogging = 'anonymous'; + applyShim({disableIPlogging: false}); + assert.equal(settings.ipLogging, 'full'); + }); + + it('leaves ipLogging alone when the operator set both', function () { + settings.ipLogging = 'truncated'; + applyShim({disableIPlogging: true, ipLogging: 'truncated'}); + assert.equal(settings.ipLogging, 'truncated'); + }); + + it('does nothing when neither key is present', function () { + settings.ipLogging = 'anonymous'; + applyShim({}); + assert.equal(settings.ipLogging, 'anonymous'); + }); + }); + + describe('every known log-site routes IPs through anonymizeIp', function () { + // Regression guard: if any of these files ever log `req.ip` / + // `socket.request.ip` / `request.ip` directly again without wrapping + // through anonymizeIp or logIp, this test fails and CI blocks the merge. + const repoRoot = path.resolve(__dirname, '..', '..', '..', '..'); + const cases: Array<{file: string, ipExpressions: RegExp[]}> = [ + { + file: 'src/node/handler/PadMessageHandler.ts', + ipExpressions: [/socket\.request\.ip/g], + }, + { + file: 'src/node/handler/SocketIORouter.ts', + ipExpressions: [/socket\.request\.ip/g], + }, + { + file: 'src/node/hooks/express/webaccess.ts', + ipExpressions: [/req\.ip/g], + }, + { + file: 'src/node/hooks/express/importexport.ts', + ipExpressions: [/request\.ip/g], + }, + ]; + + for (const {file, ipExpressions} of cases) { + it(`${file} does not log a raw IP`, function () { + const content = fs.readFileSync(path.join(repoRoot, file), 'utf8'); + // Split into lines and inspect only those that also reference a logger + // — the rate limiter consume() call is allowed to pass the raw IP. + const offending: string[] = []; + for (const line of content.split('\n')) { + if (!/(?:accessLogger|messageLogger|httpLogger|logger|console)\.(?:info|warn|error|debug|log)|backtick.*IP/i + .test(line) && !line.includes('IP:') && !line.includes('IP address')) continue; + if (line.includes('anonymizeIp') || line.includes('logIp(')) continue; + for (const re of ipExpressions) { + if (re.test(line)) { + offending.push(line.trim()); + break; + } + } + } + assert.deepEqual(offending, [], + `found raw IP(s) in log lines of ${file}:\n${offending.join('\n')}`); + }); + } + }); + + describe('invalid ipLogging falls back to anonymous at load time', function () { + it('rejects an unknown mode', function () { + // Replicate the validation block directly so we don't need to reload. + const valid = ['full', 'truncated', 'anonymous']; + let mode: any = 'lolnope'; + if (!valid.includes(mode)) mode = 'anonymous'; + assert.equal(mode, 'anonymous'); + assert.equal(anonymizeIp('8.8.8.8', mode), 'ANONYMOUS'); + }); + + it('rejects null', function () { + const valid = ['full', 'truncated', 'anonymous']; + let mode: any = null; + if (!valid.includes(mode)) mode = 'anonymous'; + assert.equal(mode, 'anonymous'); + }); + }); +});