228228TDEXLogSmgrInitWrite (bool encrypt_xlog )
229229{
230230 WalEncryptionKey * key = pg_tde_read_last_wal_key ();
231+ WalLocation start = {.tli = 1 ,.lsn = 0 };
232+ WALKeyCacheRec * keys ;
231233
232234 /*
233235 * Always generate a new key on starting PostgreSQL to protect against
@@ -248,6 +250,14 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
248250 TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
249251 }
250252
253+ keys = pg_tde_get_wal_cache_keys ();
254+
255+ if (keys == NULL )
256+ {
257+ /* TODO cache is empty, try to preread keys from disk */
258+ keys = pg_tde_fetch_wal_keys (start );
259+ }
260+
251261 if (key )
252262 pfree (key );
253263}
@@ -265,6 +275,30 @@ TDEXLogSmgrInitWriteReuseKey()
265275 }
266276}
267277
278+ /*
279+ * Encrypt XLog page(s) from the buf and write to the segment file.
280+ */
281+ static ssize_t
282+ TDEXLogWriteEncryptedPagesOldKeys (int fd , const void * buf , size_t count , off_t offset ,
283+ TimeLineID tli , XLogSegNo segno , int segSize )
284+ {
285+ char * enc_buff = EncryptionBuf ;
286+
287+ #ifndef FRONTEND
288+ Assert (count <= TDEXLogEncryptBuffSize ());
289+ #endif
290+
291+ /* Copy the data as-is, as we might have unencrypted parts */
292+ memcpy (enc_buff , buf , count );
293+
294+ /* This method potentially allocates, but only in very early execution
295+ Shouldn't happen in a write, where we are in a critical section */
296+ TDEXLogCryptBuffer (buf , enc_buff , count , offset , tli , segno , segSize );
297+
298+ return pg_pwrite (fd , enc_buff , count , offset );
299+ }
300+
301+
268302/*
269303 * Encrypt XLog page(s) from the buf and write to the segment file.
270304 */
@@ -286,6 +320,7 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
286320#endif
287321
288322 CalcXLogPageIVPrefix (tli , segno , key -> base_iv , iv_prefix );
323+
289324 pg_tde_stream_crypt (iv_prefix ,
290325 offset ,
291326 (char * ) buf ,
@@ -301,26 +336,49 @@ static ssize_t
301336tdeheap_xlog_seg_write (int fd , const void * buf , size_t count , off_t offset ,
302337 TimeLineID tli , XLogSegNo segno , int segSize )
303338{
339+ bool lastKeyUsable ;
340+ bool afterLastKey ;
341+ #ifdef FRONTEND
342+ bool crashRecovery = false;
343+ #else
344+ bool crashRecovery = GetRecoveryState () == RECOVERY_STATE_CRASH ;
345+ #endif
346+
347+ WalLocation loc = {.tli = tli };
348+
349+ XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
350+
304351 /*
305352 * Set the last (most recent) key's start LSN if not set.
306353 *
307354 * This func called with WALWriteLock held, so no need in any extra sync.
308355 */
309- if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && TDEXLogGetEncKeyLsn () == 0 )
310- {
311- WalLocation loc = {.tli = tli };
312356
313- XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
357+ lastKeyUsable = (TDEXLogGetEncKeyLsn () != 0 );
358+ afterLastKey = (TDEXLogGetEncKeyLsn () <= loc .lsn );
314359
315- pg_tde_wal_last_key_set_location (loc );
316- EncryptionKey .wal_start = loc ;
317- TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
360+ if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && !lastKeyUsable )
361+ {
362+ WALKeyCacheRec * last_key = pg_tde_get_last_wal_key ();
363+ if (!crashRecovery || EncryptionKey .type == WAL_KEY_TYPE_UNENCRYPTED ) {
364+ /* TODO: the unencrypted case is still not perfect, we need to report an error in some cornercases */
365+ if (last_key == NULL || last_key -> start .lsn < loc .lsn ) {
366+ pg_tde_wal_last_key_set_location (loc );
367+ EncryptionKey .wal_start = loc ;
368+ TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
369+ lastKeyUsable = true;
370+ }
371+ }
318372 }
319373
320- if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
321- return TDEXLogWriteEncryptedPages (fd , buf , count , offset , tli , segno );
322- else
374+ if ((!afterLastKey || !lastKeyUsable ) && EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED ) {
375+ return TDEXLogWriteEncryptedPagesOldKeys (fd , buf , count , offset , tli , segno , segSize );
376+ } else if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED ) {
377+ return TDEXLogWriteEncryptedPages (fd , buf , count , offset , tli , segno );
378+ }
379+ else {
323380 return pg_pwrite (fd , buf , count , offset );
381+ }
324382}
325383
326384/*
@@ -342,7 +400,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
342400 if (readsz <= 0 )
343401 return readsz ;
344402
345- TDEXLogCryptBuffer (buf , count , offset , tli , segno , segSize );
403+ TDEXLogCryptBuffer (buf , buf , count , offset , tli , segno , segSize );
346404
347405 return readsz ;
348406}
@@ -351,20 +409,22 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
351409 * [De]Crypt buffer if needed based on provided segment offset, number and TLI
352410 */
353411void
354- TDEXLogCryptBuffer (void * buf , size_t count , off_t offset ,
412+ TDEXLogCryptBuffer (const void * buf , void * out_buf , size_t count , off_t offset ,
355413 TimeLineID tli , XLogSegNo segno , int segSize )
356414{
357415 WALKeyCacheRec * keys = pg_tde_get_wal_cache_keys ();
358416 XLogRecPtr write_key_lsn ;
359417 WalLocation data_end = {.tli = tli };
360418 WalLocation data_start = {.tli = tli };
361419
362- if (! keys )
420+ if (keys == NULL )
363421 {
364422 WalLocation start = {.tli = 1 ,.lsn = 0 };
365423
366424 /* cache is empty, try to read keys from disk */
367- keys = pg_tde_fetch_wal_keys (start );
425+ pg_tde_fetch_wal_keys (start );
426+
427+ keys = pg_tde_get_wal_cache_keys ();
368428 }
369429
370430 /*
@@ -423,6 +483,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
423483 off_t dec_end = XLogSegmentOffset (minlsn , segSize );
424484 size_t dec_sz ;
425485 char * dec_buf = (char * ) buf + (dec_off - offset );
486+ char * o_buf = (char * ) out_buf + (dec_off - offset );
426487
427488 Assert (dec_off >= offset );
428489
@@ -434,20 +495,26 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
434495 dec_end = offset + count ;
435496 }
436497
437- dec_sz = dec_end - dec_off ;
498+ if (dec_end > dec_off ) {
499+ dec_sz = dec_end - dec_off ;
500+ } else {
501+ // assert?
502+ dec_sz = 0 ;
503+ }
438504
439505#ifdef TDE_XLOG_DEBUG
440506 elog (DEBUG1 , "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_%X/%X" ,
441507 dec_off , dec_off - offset , dec_sz , curr_key -> key .wal_start .tli , LSN_FORMAT_ARGS (curr_key -> key .wal_start .lsn ));
442508#endif
509+
443510 pg_tde_stream_crypt (iv_prefix ,
444511 dec_off ,
445512 dec_buf ,
446513 dec_sz ,
447- dec_buf ,
514+ o_buf ,
448515 curr_key -> key .key ,
449516 & curr_key -> crypt_ctx );
450- }
517+ }
451518 }
452519 }
453520}
0 commit comments