226226TDEXLogSmgrInitWrite (bool encrypt_xlog )
227227{
228228 WalEncryptionKey * key = pg_tde_read_last_wal_key ();
229+ WalLocation start = {.tli = 1 ,.lsn = 0 };
230+ WALKeyCacheRec * keys ;
229231
230232 /*
231233 * Always generate a new key on starting PostgreSQL to protect against
@@ -246,6 +248,14 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
246248 TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
247249 }
248250
251+ keys = pg_tde_get_wal_cache_keys ();
252+
253+ if (keys == NULL )
254+ {
255+ /* TODO cache is empty, try to preread keys from disk */
256+ keys = pg_tde_fetch_wal_keys (start );
257+ }
258+
249259 if (key )
250260 pfree (key );
251261}
@@ -263,6 +273,32 @@ TDEXLogSmgrInitWriteReuseKey()
263273 }
264274}
265275
276+ /*
277+ * Encrypt XLog page(s) from the buf and write to the segment file.
278+ */
279+ static ssize_t
280+ TDEXLogWriteEncryptedPagesOldKeys (int fd , const void * buf , size_t count , off_t offset ,
281+ TimeLineID tli , XLogSegNo segno , int segSize )
282+ {
283+ char * enc_buff = EncryptionBuf ;
284+
285+ #ifndef FRONTEND
286+ Assert (count <= TDEXLogEncryptBuffSize ());
287+ #endif
288+
289+ /* Copy the data as-is, as we might have unencrypted parts */
290+ memcpy (enc_buff , buf , count );
291+
292+ /*
293+ * This method potentially allocates, but only in very early execution
294+ * Shouldn't happen in a write, where we are in a critical section
295+ */
296+ TDEXLogCryptBuffer (buf , enc_buff , count , offset , tli , segno , segSize );
297+
298+ return pg_pwrite (fd , enc_buff , count , offset );
299+ }
300+
301+
266302/*
267303 * Encrypt XLog page(s) from the buf and write to the segment file.
268304 */
@@ -284,6 +320,7 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
284320#endif
285321
286322 CalcXLogPageIVPrefix (tli , segno , key -> base_iv , iv_prefix );
323+
287324 pg_tde_stream_crypt (iv_prefix ,
288325 offset ,
289326 (char * ) buf ,
@@ -299,26 +336,59 @@ static ssize_t
299336tdeheap_xlog_seg_write (int fd , const void * buf , size_t count , off_t offset ,
300337 TimeLineID tli , XLogSegNo segno , int segSize )
301338{
339+ bool lastKeyUsable ;
340+ bool afterLastKey ;
341+ #ifdef FRONTEND
342+ bool crashRecovery = false;
343+ #else
344+ bool crashRecovery = GetRecoveryState () == RECOVERY_STATE_CRASH ;
345+ #endif
346+
347+ WalLocation loc = {.tli = tli };
348+
349+ XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
350+
302351 /*
303352 * Set the last (most recent) key's start LSN if not set.
304353 *
305354 * This func called with WALWriteLock held, so no need in any extra sync.
306355 */
307- if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && TDEXLogGetEncKeyLsn () == 0 )
308- {
309- WalLocation loc = {.tli = tli };
310356
311- XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
357+ lastKeyUsable = (TDEXLogGetEncKeyLsn () != 0 );
358+ afterLastKey = (TDEXLogGetEncKeyLsn () <= loc .lsn );
312359
313- pg_tde_wal_last_key_set_location (loc );
314- EncryptionKey .wal_start = loc ;
315- TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
360+ if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && !lastKeyUsable )
361+ {
362+ WALKeyCacheRec * last_key = pg_tde_get_last_wal_key ();
363+
364+ if (!crashRecovery || EncryptionKey .type == WAL_KEY_TYPE_UNENCRYPTED )
365+ {
366+ /*
367+ * TODO: the unencrypted case is still not perfect, we need to
368+ * report an error in some cornercases
369+ */
370+ if (last_key == NULL || last_key -> start .lsn < loc .lsn )
371+ {
372+ pg_tde_wal_last_key_set_location (loc );
373+ EncryptionKey .wal_start = loc ;
374+ TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
375+ lastKeyUsable = true;
376+ }
377+ }
316378 }
317379
318- if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
380+ if ((!afterLastKey || !lastKeyUsable ) && EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
381+ {
382+ return TDEXLogWriteEncryptedPagesOldKeys (fd , buf , count , offset , tli , segno , segSize );
383+ }
384+ else if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
385+ {
319386 return TDEXLogWriteEncryptedPages (fd , buf , count , offset , tli , segno );
387+ }
320388 else
389+ {
321390 return pg_pwrite (fd , buf , count , offset );
391+ }
322392}
323393
324394/*
@@ -340,7 +410,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
340410 if (readsz <= 0 )
341411 return readsz ;
342412
343- TDEXLogCryptBuffer (buf , count , offset , tli , segno , segSize );
413+ TDEXLogCryptBuffer (buf , buf , count , offset , tli , segno , segSize );
344414
345415 return readsz ;
346416}
@@ -349,20 +419,22 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
349419 * [De]Crypt buffer if needed based on provided segment offset, number and TLI
350420 */
351421void
352- TDEXLogCryptBuffer (void * buf , size_t count , off_t offset ,
422+ TDEXLogCryptBuffer (const void * buf , void * out_buf , size_t count , off_t offset ,
353423 TimeLineID tli , XLogSegNo segno , int segSize )
354424{
355425 WALKeyCacheRec * keys = pg_tde_get_wal_cache_keys ();
356426 XLogRecPtr write_key_lsn ;
357427 WalLocation data_end = {.tli = tli };
358428 WalLocation data_start = {.tli = tli };
359429
360- if (! keys )
430+ if (keys == NULL )
361431 {
362432 WalLocation start = {.tli = 1 ,.lsn = 0 };
363433
364434 /* cache is empty, try to read keys from disk */
365- keys = pg_tde_fetch_wal_keys (start );
435+ pg_tde_fetch_wal_keys (start );
436+
437+ keys = pg_tde_get_wal_cache_keys ();
366438 }
367439
368440 /*
@@ -421,6 +493,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
421493 off_t dec_end = XLogSegmentOffset (minlsn , segSize );
422494 size_t dec_sz ;
423495 char * dec_buf = (char * ) buf + (dec_off - offset );
496+ char * o_buf = (char * ) out_buf + (dec_off - offset );
424497
425498 Assert (dec_off >= offset );
426499
@@ -432,17 +505,26 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
432505 dec_end = offset + count ;
433506 }
434507
435- dec_sz = dec_end - dec_off ;
508+ if (dec_end > dec_off )
509+ {
510+ dec_sz = dec_end - dec_off ;
511+ }
512+ else
513+ {
514+ /* assert? */
515+ dec_sz = 0 ;
516+ }
436517
437518#ifdef TDE_XLOG_DEBUG
438519 elog (DEBUG1 , "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_%X/%X" ,
439520 dec_off , dec_off - offset , dec_sz , curr_key -> key .wal_start .tli , LSN_FORMAT_ARGS (curr_key -> key .wal_start .lsn ));
440521#endif
522+
441523 pg_tde_stream_crypt (iv_prefix ,
442524 dec_off ,
443525 dec_buf ,
444526 dec_sz ,
445- dec_buf ,
527+ o_buf ,
446528 curr_key -> key .key ,
447529 & curr_key -> crypt_ctx );
448530 }
0 commit comments