Skip to content

Commit 4b934b9

Browse files
committed
PG-1605 take 2: only switch keys after recovery ended
Or alternatively, if we progressed at least 1 page after the previous key. This makes sure that even replicas, which are always in recovery start using a new key with a small delay, while also allowing a gap for crash recovery.
1 parent 9183672 commit 4b934b9

File tree

6 files changed

+721
-18
lines changed

6 files changed

+721
-18
lines changed

contrib/pg_tde/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ tap_tests = [
127127
't/wal_archiving.pl',
128128
't/wal_encrypt.pl',
129129
't/wal_key_tli.pl',
130+
't/059_tde_2pc_replication.pl',
130131
]
131132

132133
tests += {

contrib/pg_tde/src/access/pg_tde_xlog_smgr.c

Lines changed: 91 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ static WalEncryptionKey EncryptionKey =
4848
.wal_start = {.tli = 0,.lsn = InvalidXLogRecPtr},
4949
};
5050

51+
static void
52+
iv_prefix_debug(const char *iv_prefix, char *out_hex)
53+
{
54+
for (int i = 0; i < 16; ++i)
55+
{
56+
sprintf(out_hex + i * 2, "%02x", (int) *(iv_prefix + i));
57+
}
58+
out_hex[32] = 0;
59+
}
60+
5161
/*
5262
* Must be the same as in replication/walsender.c
5363
*
@@ -248,6 +258,13 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
248258
TDEXLogSetEncKeyLocation(EncryptionKey.wal_start);
249259
}
250260

261+
{
262+
WalLocation start = {.tli = 1,.lsn = 0};
263+
264+
/* TODO cache is empty, try to preread keys from disk */
265+
//pg_tde_fetch_wal_keys(start);
266+
}
267+
251268
if (key)
252269
pfree(key);
253270
}
@@ -265,6 +282,27 @@ TDEXLogSmgrInitWriteReuseKey()
265282
}
266283
}
267284

285+
/*
286+
* Encrypt XLog page(s) from the buf and write to the segment file.
287+
*/
288+
static ssize_t
289+
TDEXLogWriteEncryptedPagesOldKeys(int fd, const void *buf, size_t count, off_t offset,
290+
TimeLineID tli, XLogSegNo segno, int segSize)
291+
{
292+
char *enc_buff = EncryptionBuf;
293+
294+
#ifndef FRONTEND
295+
Assert(count <= TDEXLogEncryptBuffSize());
296+
#endif
297+
298+
/* This method potentially allocates, but only in very early execution
299+
Shouldn't happen in a write, where we are in a critical section */
300+
TDEXLogCryptBuffer(buf, enc_buff, count, offset, tli, segno, segSize);
301+
302+
return pg_pwrite(fd, enc_buff, count, offset);
303+
}
304+
305+
268306
/*
269307
* Encrypt XLog page(s) from the buf and write to the segment file.
270308
*/
@@ -280,12 +318,17 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
280318
Assert(count <= TDEXLogEncryptBuffSize());
281319
#endif
282320

321+
CalcXLogPageIVPrefix(tli, segno, key->base_iv, iv_prefix);
322+
283323
#ifdef TDE_XLOG_DEBUG
284-
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %u_%X/%X",
285-
count, offset, offset, LSN_FORMAT_ARGS(segno), key->wal_start.tli, LSN_FORMAT_ARGS(key->wal_start.lsn));
324+
char tbuf[33], tbuf2[33], tbuf3[33];
325+
iv_prefix_debug(key->key, tbuf);
326+
iv_prefix_debug(iv_prefix, tbuf2);
327+
iv_prefix_debug(buf, tbuf3);
328+
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %u_%X/%X %s %s %s",
329+
count, offset, offset, LSN_FORMAT_ARGS(segno), key->wal_start.tli, LSN_FORMAT_ARGS(key->wal_start.lsn), tbuf, tbuf2, tbuf3);
286330
#endif
287331

288-
CalcXLogPageIVPrefix(tli, segno, key->base_iv, iv_prefix);
289332
pg_tde_stream_crypt(iv_prefix,
290333
offset,
291334
(char *) buf,
@@ -301,26 +344,53 @@ static ssize_t
301344
tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset,
302345
TimeLineID tli, XLogSegNo segno, int segSize)
303346
{
347+
#ifdef FRONTEND
348+
bool recoveryInProgress = false;
349+
#else
350+
bool recoveryInProgress = RecoveryInProgress();
351+
#endif
304352
/*
305353
* Set the last (most recent) key's start LSN if not set.
306354
*
307355
* This func called with WALWriteLock held, so no need in any extra sync.
308356
*/
309-
if (EncryptionKey.type != WAL_KEY_TYPE_INVALID && TDEXLogGetEncKeyLsn() == 0)
357+
358+
bool lastKeyUsable = (TDEXLogGetEncKeyLsn() != 0);
359+
360+
if (EncryptionKey.type != WAL_KEY_TYPE_INVALID && !lastKeyUsable)
310361
{
311362
WalLocation loc = {.tli = tli};
312363

313364
XLogSegNoOffsetToRecPtr(segno, offset, segSize, loc.lsn);
314365

366+
if(!recoveryInProgress || EncryptionKey.type == WAL_KEY_TYPE_UNENCRYPTED) {
367+
// TODO: detect if we have conflicting later keys in case of unencrypted, and error out
315368
pg_tde_wal_last_key_set_location(loc);
316369
EncryptionKey.wal_start = loc;
317370
TDEXLogSetEncKeyLocation(EncryptionKey.wal_start);
371+
lastKeyUsable = true;
372+
} else {
373+
WALKeyCacheRec *last_key = pg_tde_get_last_wal_key();
374+
if(last_key) {
375+
if (wal_location_cmp(last_key->start, loc) < -8192) {
376+
// If we progressed at least one more page, it is safe to switch keys
377+
pg_tde_wal_last_key_set_location(loc);
378+
EncryptionKey.wal_start = loc;
379+
TDEXLogSetEncKeyLocation(EncryptionKey.wal_start);
380+
lastKeyUsable = true;
381+
}
382+
}
383+
}
318384
}
319385

320-
if (EncryptionKey.type == WAL_KEY_TYPE_ENCRYPTED)
321-
return TDEXLogWriteEncryptedPages(fd, buf, count, offset, tli, segno);
322-
else
386+
if(!lastKeyUsable && EncryptionKey.type == WAL_KEY_TYPE_ENCRYPTED) {
387+
return TDEXLogWriteEncryptedPagesOldKeys(fd, buf, count, offset, tli, segno, segSize);
388+
} else if (EncryptionKey.type == WAL_KEY_TYPE_ENCRYPTED) {
389+
return TDEXLogWriteEncryptedPages(fd, buf, count, offset, tli, segno);
390+
}
391+
else {
323392
return pg_pwrite(fd, buf, count, offset);
393+
}
324394
}
325395

326396
/*
@@ -342,7 +412,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
342412
if (readsz <= 0)
343413
return readsz;
344414

345-
TDEXLogCryptBuffer(buf, count, offset, tli, segno, segSize);
415+
TDEXLogCryptBuffer(buf, buf, count, offset, tli, segno, segSize);
346416

347417
return readsz;
348418
}
@@ -351,7 +421,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
351421
* [De]Crypt buffer if needed based on provided segment offset, number and TLI
352422
*/
353423
void
354-
TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
424+
TDEXLogCryptBuffer(const void *buf, void *out_buf, size_t count, off_t offset,
355425
TimeLineID tli, XLogSegNo segno, int segSize)
356426
{
357427
WALKeyCacheRec *keys = pg_tde_get_wal_cache_keys();
@@ -423,6 +493,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
423493
off_t dec_end = XLogSegmentOffset(minlsn, segSize);
424494
size_t dec_sz;
425495
char *dec_buf = (char *) buf + (dec_off - offset);
496+
char *o_buf = (char *) out_buf + (dec_off - offset);
426497

427498
Assert(dec_off >= offset);
428499

@@ -436,18 +507,23 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
436507

437508
dec_sz = dec_end - dec_off;
438509

439-
#ifdef TDE_XLOG_DEBUG
440-
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_%X/%X",
441-
dec_off, dec_off - offset, dec_sz, curr_key->key.wal_start.tli, LSN_FORMAT_ARGS(curr_key->key.wal_start.lsn));
442-
#endif
510+
443511
pg_tde_stream_crypt(iv_prefix,
444512
dec_off,
445513
dec_buf,
446514
dec_sz,
447-
dec_buf,
515+
o_buf,
448516
curr_key->key.key,
449517
&curr_key->crypt_ctx);
450-
}
518+
#ifdef TDE_XLOG_DEBUG
519+
char tbuf[33], tbuf2[33], tbuf3[33];
520+
iv_prefix_debug(curr_key->key.key, tbuf);
521+
iv_prefix_debug(iv_prefix, tbuf2);
522+
iv_prefix_debug(o_buf, tbuf3);
523+
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_%X/%X %s %s %s",
524+
dec_off, dec_off - offset, dec_sz, curr_key->key.wal_start.tli, LSN_FORMAT_ARGS(curr_key->key.wal_start.lsn), tbuf, tbuf2, tbuf3);
525+
#endif
526+
}
451527
}
452528
}
453529
}

contrib/pg_tde/src/include/access/pg_tde_xlog_smgr.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ extern void TDEXLogSmgrInit(void);
1313
extern void TDEXLogSmgrInitWrite(bool encrypt_xlog);
1414
extern void TDEXLogSmgrInitWriteReuseKey(void);
1515

16-
extern void TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
16+
extern void TDEXLogCryptBuffer(const void *buf, void *out_buf, size_t count, off_t offset,
1717
TimeLineID tli, XLogSegNo segno, int segSize);
1818

1919
#endif /* PG_TDE_XLOGSMGR_H */

contrib/pg_tde/src/include/pg_tde_defines.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77

88
/* #define ENCRYPTION_DEBUG 1 */
99
/* #define KEYRING_DEBUG 1 */
10-
/* #define TDE_XLOG_DEBUG 1 */
10+
#define TDE_XLOG_DEBUG 1
1111

1212
#endif /* PG_TDE_DEFINES_H */

0 commit comments

Comments
 (0)