228228TDEXLogSmgrInitWrite (bool encrypt_xlog )
229229{
230230 WalEncryptionKey * key = pg_tde_read_last_wal_key ();
231+ WalLocation start = {.tli = 1 ,.lsn = 0 };
232+ WALKeyCacheRec * keys ;
231233
232234 /*
233235 * Always generate a new key on starting PostgreSQL to protect against
@@ -248,6 +250,14 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
248250 TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
249251 }
250252
253+ keys = pg_tde_get_wal_cache_keys ();
254+
255+ if (keys == NULL )
256+ {
257+ /* TODO cache is empty, try to preread keys from disk */
258+ keys = pg_tde_fetch_wal_keys (start );
259+ }
260+
251261 if (key )
252262 pfree (key );
253263}
@@ -265,6 +275,27 @@ TDEXLogSmgrInitWriteReuseKey()
265275 }
266276}
267277
278+ /*
279+ * Encrypt XLog page(s) from the buf and write to the segment file.
280+ */
281+ static ssize_t
282+ TDEXLogWriteEncryptedPagesOldKeys (int fd , const void * buf , size_t count , off_t offset ,
283+ TimeLineID tli , XLogSegNo segno , int segSize )
284+ {
285+ char * enc_buff = EncryptionBuf ;
286+
287+ #ifndef FRONTEND
288+ Assert (count <= TDEXLogEncryptBuffSize ());
289+ #endif
290+
291+ /* This method potentially allocates, but only in very early execution
292+ Shouldn't happen in a write, where we are in a critical section */
293+ TDEXLogCryptBuffer (buf , enc_buff , count , offset , tli , segno , segSize );
294+
295+ return pg_pwrite (fd , enc_buff , count , offset );
296+ }
297+
298+
268299/*
269300 * Encrypt XLog page(s) from the buf and write to the segment file.
270301 */
@@ -286,6 +317,7 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
286317#endif
287318
288319 CalcXLogPageIVPrefix (tli , segno , key -> base_iv , iv_prefix );
320+
289321 pg_tde_stream_crypt (iv_prefix ,
290322 offset ,
291323 (char * ) buf ,
@@ -301,26 +333,49 @@ static ssize_t
301333tdeheap_xlog_seg_write (int fd , const void * buf , size_t count , off_t offset ,
302334 TimeLineID tli , XLogSegNo segno , int segSize )
303335{
336+ bool lastKeyUsable ;
337+ bool afterLastKey ;
338+ #ifdef FRONTEND
339+ bool crashRecovery = false;
340+ #else
341+ bool crashRecovery = GetRecoveryState () == RECOVERY_STATE_CRASH ;
342+ #endif
343+
344+ WalLocation loc = {.tli = tli };
345+
346+ XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
347+
304348 /*
305349 * Set the last (most recent) key's start LSN if not set.
306350 *
307351 * This func called with WALWriteLock held, so no need in any extra sync.
308352 */
309- if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && TDEXLogGetEncKeyLsn () == 0 )
310- {
311- WalLocation loc = {.tli = tli };
312353
313- XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
354+ lastKeyUsable = (TDEXLogGetEncKeyLsn () != 0 );
355+ afterLastKey = (TDEXLogGetEncKeyLsn () <= loc .lsn );
314356
315- pg_tde_wal_last_key_set_location (loc );
316- EncryptionKey .wal_start = loc ;
317- TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
357+ if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && !lastKeyUsable )
358+ {
359+ WALKeyCacheRec * last_key = pg_tde_get_last_wal_key ();
360+ if (!crashRecovery || EncryptionKey .type == WAL_KEY_TYPE_UNENCRYPTED ) {
361+ /* TODO: the unencrypted case is still not perfect, we need to report an error in some cornercases */
362+ if (last_key == NULL || last_key -> start .lsn < loc .lsn ) {
363+ pg_tde_wal_last_key_set_location (loc );
364+ EncryptionKey .wal_start = loc ;
365+ TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
366+ lastKeyUsable = true;
367+ }
368+ }
318369 }
319370
320- if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
321- return TDEXLogWriteEncryptedPages (fd , buf , count , offset , tli , segno );
322- else
371+ if ((!afterLastKey || !lastKeyUsable ) && EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED ) {
372+ return TDEXLogWriteEncryptedPagesOldKeys (fd , buf , count , offset , tli , segno , segSize );
373+ } else if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED ) {
374+ return TDEXLogWriteEncryptedPages (fd , buf , count , offset , tli , segno );
375+ }
376+ else {
323377 return pg_pwrite (fd , buf , count , offset );
378+ }
324379}
325380
326381/*
@@ -342,7 +397,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
342397 if (readsz <= 0 )
343398 return readsz ;
344399
345- TDEXLogCryptBuffer (buf , count , offset , tli , segno , segSize );
400+ TDEXLogCryptBuffer (buf , buf , count , offset , tli , segno , segSize );
346401
347402 return readsz ;
348403}
@@ -351,20 +406,22 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
351406 * [De]Crypt buffer if needed based on provided segment offset, number and TLI
352407 */
353408void
354- TDEXLogCryptBuffer (void * buf , size_t count , off_t offset ,
409+ TDEXLogCryptBuffer (const void * buf , void * out_buf , size_t count , off_t offset ,
355410 TimeLineID tli , XLogSegNo segno , int segSize )
356411{
357412 WALKeyCacheRec * keys = pg_tde_get_wal_cache_keys ();
358413 XLogRecPtr write_key_lsn ;
359414 WalLocation data_end = {.tli = tli };
360415 WalLocation data_start = {.tli = tli };
361416
362- if (! keys )
417+ if (keys == NULL )
363418 {
364419 WalLocation start = {.tli = 1 ,.lsn = 0 };
365420
366421 /* cache is empty, try to read keys from disk */
367- keys = pg_tde_fetch_wal_keys (start );
422+ pg_tde_fetch_wal_keys (start );
423+
424+ keys = pg_tde_get_wal_cache_keys ();
368425 }
369426
370427 /*
@@ -423,6 +480,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
423480 off_t dec_end = XLogSegmentOffset (minlsn , segSize );
424481 size_t dec_sz ;
425482 char * dec_buf = (char * ) buf + (dec_off - offset );
483+ char * o_buf = (char * ) out_buf + (dec_off - offset );
426484
427485 Assert (dec_off >= offset );
428486
@@ -434,20 +492,26 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
434492 dec_end = offset + count ;
435493 }
436494
437- dec_sz = dec_end - dec_off ;
495+ if (dec_end > dec_off ) {
496+ dec_sz = dec_end - dec_off ;
497+ } else {
498+ // assert?
499+ dec_sz = 0 ;
500+ }
438501
439502#ifdef TDE_XLOG_DEBUG
440503 elog (DEBUG1 , "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_%X/%X" ,
441504 dec_off , dec_off - offset , dec_sz , curr_key -> key .wal_start .tli , LSN_FORMAT_ARGS (curr_key -> key .wal_start .lsn ));
442505#endif
506+
443507 pg_tde_stream_crypt (iv_prefix ,
444508 dec_off ,
445509 dec_buf ,
446510 dec_sz ,
447- dec_buf ,
511+ o_buf ,
448512 curr_key -> key .key ,
449513 & curr_key -> crypt_ctx );
450- }
514+ }
451515 }
452516 }
453517}
0 commit comments