226226TDEXLogSmgrInitWrite (bool encrypt_xlog )
227227{
228228 WalEncryptionKey * key = pg_tde_read_last_wal_key ();
229+ WALKeyCacheRec * keys ;
229230
230231 /*
231232 * Always generate a new key on starting PostgreSQL to protect against
@@ -246,6 +247,16 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
246247 TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
247248 }
248249
250+ keys = pg_tde_get_wal_cache_keys ();
251+
252+ if (keys == NULL )
253+ {
254+ WalLocation start = {.tli = 1 ,.lsn = 0 };
255+
256+ /* cache is empty, prefetch keys from disk */
257+ pg_tde_fetch_wal_keys (start );
258+ }
259+
249260 if (key )
250261 pfree (key );
251262}
@@ -263,6 +274,32 @@ TDEXLogSmgrInitWriteReuseKey()
263274 }
264275}
265276
277+ /*
278+ * Encrypt XLog page(s) from the buf and write to the segment file.
279+ */
280+ static ssize_t
281+ TDEXLogWriteEncryptedPagesOldKeys (int fd , const void * buf , size_t count , off_t offset ,
282+ TimeLineID tli , XLogSegNo segno , int segSize )
283+ {
284+ char * enc_buff = EncryptionBuf ;
285+
286+ #ifndef FRONTEND
287+ Assert (count <= TDEXLogEncryptBuffSize ());
288+ #endif
289+
290+ /* Copy the data as-is, as we might have unencrypted parts */
291+ memcpy (enc_buff , buf , count );
292+
293+ /*
294+ * This method potentially allocates, but only in very early execution
295+ * Shouldn't happen in a write, where we are in a critical section
296+ */
297+ TDEXLogCryptBuffer (buf , enc_buff , count , offset , tli , segno , segSize );
298+
299+ return pg_pwrite (fd , enc_buff , count , offset );
300+ }
301+
302+
266303/*
267304 * Encrypt XLog page(s) from the buf and write to the segment file.
268305 */
@@ -284,6 +321,7 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
284321#endif
285322
286323 CalcXLogPageIVPrefix (tli , segno , key -> base_iv , iv_prefix );
324+
287325 pg_tde_stream_crypt (iv_prefix ,
288326 offset ,
289327 (char * ) buf ,
@@ -299,26 +337,64 @@ static ssize_t
299337tdeheap_xlog_seg_write (int fd , const void * buf , size_t count , off_t offset ,
300338 TimeLineID tli , XLogSegNo segno , int segSize )
301339{
340+ bool lastKeyUsable ;
341+ bool afterWriteKey ;
342+ #ifdef FRONTEND
343+ bool crashRecovery = false;
344+ #else
345+ bool crashRecovery = GetRecoveryState () == RECOVERY_STATE_CRASH ;
346+ #endif
347+
348+ WalLocation loc = {.tli = tli };
349+ WalLocation writeKeyLoc ;
350+
351+ XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
352+
302353 /*
303354 * Set the last (most recent) key's start LSN if not set.
304355 *
305356 * This func called with WALWriteLock held, so no need in any extra sync.
306357 */
307- if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && TDEXLogGetEncKeyLsn () == 0 )
308- {
309- WalLocation loc = {.tli = tli };
310358
311- XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
359+ writeKeyLoc .lsn = TDEXLogGetEncKeyLsn ();
360+ pg_read_barrier ();
361+ writeKeyLoc .tli = TDEXLogGetEncKeyTli ();
312362
313- pg_tde_wal_last_key_set_location (loc );
314- EncryptionKey .wal_start = loc ;
315- TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
363+ lastKeyUsable = (writeKeyLoc .lsn != 0 );
364+ afterWriteKey = wal_location_cmp (writeKeyLoc , loc ) <= 0 ;
365+
366+ if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && !lastKeyUsable )
367+ {
368+ WALKeyCacheRec * last_key = pg_tde_get_last_wal_key ();
369+
370+ if (!crashRecovery || EncryptionKey .type == WAL_KEY_TYPE_UNENCRYPTED )
371+ {
372+ /*
373+ * TODO: the unencrypted case is still not perfect, we need to
374+ * report an error in some cornercases
375+ */
376+ if (last_key == NULL || last_key -> start .lsn < loc .lsn )
377+ {
378+ pg_tde_wal_last_key_set_location (loc );
379+ EncryptionKey .wal_start = loc ;
380+ TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
381+ lastKeyUsable = true;
382+ }
383+ }
316384 }
317385
318- if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
386+ if ((!afterWriteKey || !lastKeyUsable ) && EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
387+ {
388+ return TDEXLogWriteEncryptedPagesOldKeys (fd , buf , count , offset , tli , segno , segSize );
389+ }
390+ else if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
391+ {
319392 return TDEXLogWriteEncryptedPages (fd , buf , count , offset , tli , segno );
393+ }
320394 else
395+ {
321396 return pg_pwrite (fd , buf , count , offset );
397+ }
322398}
323399
324400/*
@@ -340,7 +416,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
340416 if (readsz <= 0 )
341417 return readsz ;
342418
343- TDEXLogCryptBuffer (buf , count , offset , tli , segno , segSize );
419+ TDEXLogCryptBuffer (buf , buf , count , offset , tli , segno , segSize );
344420
345421 return readsz ;
346422}
@@ -349,15 +425,15 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
349425 * [De]Crypt buffer if needed based on provided segment offset, number and TLI
350426 */
351427void
352- TDEXLogCryptBuffer (void * buf , size_t count , off_t offset ,
428+ TDEXLogCryptBuffer (const void * buf , void * out_buf , size_t count , off_t offset ,
353429 TimeLineID tli , XLogSegNo segno , int segSize )
354430{
355431 WALKeyCacheRec * keys = pg_tde_get_wal_cache_keys ();
356432 XLogRecPtr write_key_lsn ;
357433 WalLocation data_end = {.tli = tli };
358434 WalLocation data_start = {.tli = tli };
359435
360- if (! keys )
436+ if (keys == NULL )
361437 {
362438 WalLocation start = {.tli = 1 ,.lsn = 0 };
363439
@@ -454,6 +530,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
454530 XLogSegmentOffset (end_lsn , segSize );
455531 size_t dec_sz ;
456532 char * dec_buf = (char * ) buf + (dec_off - offset );
533+ char * o_buf = (char * ) out_buf + (dec_off - offset );
457534
458535 Assert (dec_off >= offset );
459536
@@ -468,17 +545,19 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
468545 dec_end = offset + count ;
469546 }
470547
548+ Assert (dec_end > dec_off );
471549 dec_sz = dec_end - dec_off ;
472550
473551#ifdef TDE_XLOG_DEBUG
474552 elog (DEBUG1 , "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_%X/%X" ,
475553 dec_off , dec_off - offset , dec_sz , curr_key -> key .wal_start .tli , LSN_FORMAT_ARGS (curr_key -> key .wal_start .lsn ));
476554#endif
555+
477556 pg_tde_stream_crypt (iv_prefix ,
478557 dec_off ,
479558 dec_buf ,
480559 dec_sz ,
481- dec_buf ,
560+ o_buf ,
482561 curr_key -> key .key ,
483562 & curr_key -> crypt_ctx );
484563 }
0 commit comments