15
15
#include <unistd.h>
16
16
17
17
static void delete_walfiles (XLogRecPtr oldest_lsn , TimeLineID oldest_tli ,
18
- uint32 xlog_seg_size );
18
+ uint32 xlog_seg_size );
19
+ static void delete_walfiles_internal (XLogRecPtr keep_lsn , timelineInfo * tli ,
20
+ uint32 xlog_seg_size , bool dry_run );
19
21
static void do_retention_internal (parray * backup_list , parray * to_keep_list ,
20
22
parray * to_purge_list );
21
23
static void do_retention_merge (parray * backup_list , parray * to_keep_list ,
22
24
parray * to_purge_list );
23
25
static void do_retention_purge (parray * to_keep_list , parray * to_purge_list );
24
- static void do_retention_wal (void );
26
+ static void do_retention_wal (bool dry_run );
25
27
28
+ // TODO: more useful messages for dry run.
26
29
static bool backup_deleted = false; /* At least one backup was deleted */
27
30
static bool backup_merged = false; /* At least one merge was enacted */
31
+ static bool wal_deleted = false; /* At least one WAL segments was deleted */
28
32
29
33
void
30
34
do_delete (time_t backup_id )
@@ -33,8 +37,8 @@ do_delete(time_t backup_id)
33
37
parray * backup_list ,
34
38
* delete_list ;
35
39
pgBackup * target_backup = NULL ;
36
- XLogRecPtr oldest_lsn = InvalidXLogRecPtr ;
37
- TimeLineID oldest_tli = 0 ;
40
+ // XLogRecPtr oldest_lsn = InvalidXLogRecPtr;
41
+ // TimeLineID oldest_tli = 0;
38
42
39
43
/* Get complete list of backups */
40
44
backup_list = catalog_get_backup_list (instance_name , INVALID_BACKUP_ID );
@@ -86,24 +90,7 @@ do_delete(time_t backup_id)
86
90
87
91
/* Clean WAL segments */
88
92
if (delete_wal )
89
- {
90
- Assert (target_backup );
91
-
92
- /* Find oldest LSN, used by backups */
93
- for (i = (int ) parray_num (backup_list ) - 1 ; i >= 0 ; i -- )
94
- {
95
- pgBackup * backup = (pgBackup * ) parray_get (backup_list , (size_t ) i );
96
-
97
- if (backup -> status == BACKUP_STATUS_OK || backup -> status == BACKUP_STATUS_DONE )
98
- {
99
- oldest_lsn = backup -> start_lsn ;
100
- oldest_tli = backup -> tli ;
101
- break ;
102
- }
103
- }
104
-
105
- delete_walfiles (oldest_lsn , oldest_tli , instance_config .xlog_seg_size );
106
- }
93
+ do_retention_wal (false);
107
94
108
95
/* cleanup */
109
96
parray_walk (backup_list , pgBackupFree );
@@ -172,8 +159,8 @@ int do_retention(void)
172
159
do_retention_purge (to_keep_list , to_purge_list );
173
160
174
161
/* TODO: some sort of dry run for delete_wal */
175
- if (delete_wal && ! dry_run )
176
- do_retention_wal ();
162
+ if (delete_wal )
163
+ do_retention_wal (dry_run );
177
164
178
165
/* TODO: consider dry-run flag */
179
166
@@ -622,47 +609,44 @@ do_retention_purge(parray *to_keep_list, parray *to_purge_list)
622
609
}
623
610
}
624
611
625
- /* Purge WAL */
612
+ /* Purge WAL
613
+ * Iterate over timelines
614
+ * Look for closest_backup, if exists, goto next timelime
615
+ * if not exists, look for oldest backup on timeline
616
+ */
626
617
static void
627
- do_retention_wal (void )
618
+ do_retention_wal (bool dry_run )
628
619
{
629
- parray * backup_list = NULL ;
630
-
631
- XLogRecPtr oldest_lsn = InvalidXLogRecPtr ;
632
- TimeLineID oldest_tli = 0 ;
633
- bool backup_list_is_empty = false;
620
+ parray * tli_list ;
634
621
int i ;
635
622
636
- /* Get list of backups. */
637
- backup_list = catalog_get_backup_list (instance_name , INVALID_BACKUP_ID );
623
+ tli_list = catalog_get_timelines (& instance_config );
638
624
639
- if (parray_num (backup_list ) == 0 )
640
- backup_list_is_empty = true;
641
-
642
- /* Save LSN and Timeline to remove unnecessary WAL segments */
643
- for (i = (int ) parray_num (backup_list ) - 1 ; i >= 0 ; i -- )
625
+ for (i = 0 ; i < parray_num (tli_list ); i ++ )
644
626
{
645
- pgBackup * backup = (pgBackup * ) parray_get (backup_list , i );
646
-
647
- /* Get LSN and TLI of the oldest backup with valid start_lsn and tli */
648
- if (backup -> tli > 0 && !XLogRecPtrIsInvalid (backup -> start_lsn ))
649
- {
650
- oldest_tli = backup -> tli ;
651
- oldest_lsn = backup -> start_lsn ;
652
- break ;
653
- }
654
- }
627
+ timelineInfo * tlinfo = (timelineInfo * ) parray_get (tli_list , i );
655
628
656
- /* Be paranoid */
657
- if (!backup_list_is_empty && XLogRecPtrIsInvalid (oldest_lsn ))
658
- elog (ERROR , "Not going to purge WAL because LSN is invalid" );
629
+ /* Empty timeline can be safely skipped */
630
+ if (tlinfo -> n_xlog_files == 0 &&
631
+ parray_num (tlinfo -> xlog_filelist ) == 0 )
632
+ continue ;
659
633
660
- /* Purge WAL files */
661
- delete_walfiles (oldest_lsn , oldest_tli , instance_config .xlog_seg_size );
634
+ /* If closest backup is exists, then timeline can be safely skipped */
635
+ if (tlinfo -> closest_backup )
636
+ continue ;
662
637
663
- /* Cleanup */
664
- parray_walk (backup_list , pgBackupFree );
665
- parray_free (backup_list );
638
+ /*
639
+ * Purge all WAL segments before START LSN of oldest backup.
640
+ * If there is no backups on timeline, then whole timeline
641
+ * can be safely purged.
642
+ */
643
+ if (tlinfo -> oldest_backup )
644
+ delete_walfiles_internal (tlinfo -> oldest_backup -> start_lsn ,
645
+ tlinfo , instance_config .xlog_seg_size , dry_run );
646
+ else
647
+ delete_walfiles_internal (InvalidXLogRecPtr ,
648
+ tlinfo , instance_config .xlog_seg_size , dry_run );
649
+ }
666
650
}
667
651
668
652
/*
@@ -728,6 +712,158 @@ delete_backup_files(pgBackup *backup)
728
712
return ;
729
713
}
730
714
715
+ /* Purge WAL archive.
716
+ * If 'keep_lsn' is InvalidXLogRecPtr, then whole timeline can be purged
717
+ * If 'keep_lsn' is valid LSN, then every lesser segment can be purged.
718
+ * If 'dry_run' is set, then don`t actually delete anything.
719
+ *
720
+ * Case 1:
721
+ * archive is not empty, 'keep_lsn' is valid and we can delete something.
722
+ * Case 2:
723
+ * archive is not empty, 'keep_lsn' is valid and prevening us from deleting anything.
724
+ * Case 3:
725
+ * archive is not empty, 'keep_lsn' is invalid, drop everyhing in archive.
726
+ * Case 4:
727
+ * archive is empty, 'keep_lsn' is valid, assume corruption of WAL archive.
728
+ * Case 5:
729
+ * archive is empty, 'keep_lsn' is invalid, drop backup history files
730
+ * and partial WAL segments in archive.
731
+ *
732
+ * Q: Maybe we should stop treating partial WAL segments as second-class citizens?
733
+ */
734
+ static void
735
+ delete_walfiles_internal (XLogRecPtr keep_lsn , timelineInfo * tlinfo ,
736
+ uint32 xlog_seg_size , bool dry_run )
737
+ {
738
+ XLogSegNo StartSegNo ; /* First segment to delete */
739
+ XLogSegNo EndSegNo = 0 ; /* Oldest segment to keep */
740
+ int rc ;
741
+ int i ;
742
+ int wal_size_logical = 0 ;
743
+ int wal_size_actual = 0 ;
744
+ char wal_pretty_size [20 ];
745
+ bool purge_all = false;
746
+
747
+ /* Timeline is completely empty */
748
+ if (parray_num (tlinfo -> xlog_filelist ) == 0 )
749
+ {
750
+ elog (INFO , "Timeline %i is empty, nothing to remove" , tlinfo -> tli );
751
+ return ;
752
+ }
753
+
754
+ if (XLogRecPtrIsInvalid (keep_lsn ))
755
+ {
756
+ /* Drop all segments in timeline */
757
+ elog (INFO , "All files on timeline %i will be removed" , tlinfo -> tli );
758
+ StartSegNo = tlinfo -> begin_segno ;
759
+ EndSegNo = tlinfo -> end_segno ;
760
+ purge_all = true;
761
+ }
762
+ else
763
+ {
764
+ /* Drop all segments between begin_segno and segment with keep_lsn (excluding) */
765
+ StartSegNo = tlinfo -> begin_segno ;
766
+ GetXLogSegNo (keep_lsn , EndSegNo , xlog_seg_size );
767
+ }
768
+
769
+ if (EndSegNo > 0 && EndSegNo > StartSegNo )
770
+ elog (INFO , "WAL segments between %08X%08X and %08X%08X on timeline %i will be removed" ,
771
+ (uint32 ) StartSegNo / xlog_seg_size , (uint32 ) StartSegNo % xlog_seg_size ,
772
+ (uint32 ) EndSegNo / xlog_seg_size , (uint32 ) EndSegNo % xlog_seg_size ,
773
+ tlinfo -> tli );
774
+
775
+ if (EndSegNo > StartSegNo )
776
+ /* typical scenario */
777
+ wal_size_logical = (EndSegNo - StartSegNo ) * xlog_seg_size ;
778
+ else if (EndSegNo < StartSegNo )
779
+ {
780
+ /* It is actually possible for EndSegNo to be less than StartSegNo
781
+ * in case of :
782
+ * 1. WAL archive corruption.
783
+ * 2. There is no actual WAL archive to speak of and
784
+ * 'keep_lsn' is coming from STREAM backup.
785
+ *
786
+ * Assume the worst.
787
+ */
788
+ if (StartSegNo > 0 && EndSegNo > 0 )
789
+ elog (WARNING , "On timeline %i first segment %08X%08X is greater than "
790
+ "oldest segment to keep %08X%08X. Possible WAL archive corruption." ,
791
+ tlinfo -> tli ,
792
+ (uint32 ) StartSegNo / xlog_seg_size , (uint32 ) StartSegNo % xlog_seg_size ,
793
+ (uint32 ) EndSegNo / xlog_seg_size , (uint32 ) EndSegNo % xlog_seg_size );
794
+ }
795
+ else if (EndSegNo == StartSegNo && !purge_all )
796
+ {
797
+ /* 'Nothing to delete' scenario because of 'keep_lsn'
798
+ * with possible exception of partial and backup history files.
799
+ */
800
+ elog (INFO , "Nothing to remove on timeline %i" , tlinfo -> tli );
801
+ }
802
+
803
+ /* Report the logical size to delete */
804
+ if (wal_size_logical > 0 )
805
+ {
806
+ pretty_size (wal_size_logical , wal_pretty_size , lengthof (wal_pretty_size ));
807
+ elog (INFO , "WAL size to remove on timeline %i: %s" ,
808
+ tlinfo -> tli , wal_pretty_size );
809
+ }
810
+
811
+ /* Calculate the actual size to delete */
812
+ for (i = 0 ; i < parray_num (tlinfo -> xlog_filelist ); i ++ )
813
+ {
814
+ xlogFile * wal_file = (xlogFile * ) parray_get (tlinfo -> xlog_filelist , i );
815
+
816
+ if (purge_all || wal_file -> segno < EndSegNo )
817
+ wal_size_actual += wal_file -> file .size ;
818
+ }
819
+
820
+ /* Report the actual size to delete */
821
+ if (wal_size_actual > 0 )
822
+ {
823
+ pretty_size (wal_size_actual , wal_pretty_size , lengthof (wal_pretty_size ));
824
+ elog (INFO , "Resident data size to free on timeline %i: %s" ,
825
+ tlinfo -> tli , wal_pretty_size );
826
+ }
827
+
828
+ if (dry_run )
829
+ return ;
830
+
831
+ for (i = 0 ; i < parray_num (tlinfo -> xlog_filelist ); i ++ )
832
+ {
833
+ xlogFile * wal_file = (xlogFile * ) parray_get (tlinfo -> xlog_filelist , i );
834
+
835
+ if (interrupted )
836
+ elog (ERROR , "interrupted during WAL archive purge" );
837
+
838
+ /* Any segment equal or greater than EndSegNo must be kept
839
+ * unless it`s a 'purge all' scenario.
840
+ */
841
+ if (purge_all || wal_file -> segno < EndSegNo )
842
+ {
843
+ /* unlink segment */
844
+ rc = unlink (wal_file -> file .path );
845
+ if (rc < 0 )
846
+ {
847
+ /* Missing file is not considered as error condition */
848
+ if (errno != ENOENT )
849
+ elog (ERROR , "Could not remove file \"%s\": %s" ,
850
+ wal_file -> file .path , strerror (errno ));
851
+ }
852
+ else
853
+ {
854
+ if (wal_file -> type == SEGMENT )
855
+ elog (VERBOSE , "Removed WAL segment \"%s\"" , wal_file -> file .path );
856
+ else if (wal_file -> type == PARTIAL_SEGMENT )
857
+ elog (VERBOSE , "Removed partial WAL segment \"%s\"" , wal_file -> file .path );
858
+ else if (wal_file -> type == BACKUP_HISTORY_FILE )
859
+ elog (VERBOSE , "Removed backup history file \"%s\"" , wal_file -> file .path );
860
+ }
861
+
862
+ wal_deleted = true;
863
+ }
864
+ }
865
+ }
866
+
731
867
/*
732
868
* Deletes WAL segments up to oldest_lsn or all WAL segments (if all backups
733
869
* was deleted and so oldest_lsn is invalid).
@@ -739,7 +875,7 @@ delete_backup_files(pgBackup *backup)
739
875
*/
740
876
static void
741
877
delete_walfiles (XLogRecPtr oldest_lsn , TimeLineID oldest_tli ,
742
- uint32 xlog_seg_size )
878
+ uint32 xlog_seg_size )
743
879
{
744
880
XLogSegNo targetSegNo ;
745
881
char oldestSegmentNeeded [MAXFNAMELEN ];
0 commit comments