Skip to content

Commit 1d8a654

Browse files
committed
[Issue #127] WAL retention
1 parent 51f4e38 commit 1d8a654

File tree

9 files changed

+1379
-39
lines changed

9 files changed

+1379
-39
lines changed

src/backup.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,6 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo)
183183
}
184184

185185
/* Obtain current timeline */
186-
current.tli = get_current_timeline(backup_conn);
187-
188186
#if PG_VERSION_NUM >= 90600
189187
current.tli = get_current_timeline(backup_conn);
190188
#else

src/catalog.c

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ timelineInfoNew(TimeLineID tli)
3636
tlinfo->switchpoint = InvalidXLogRecPtr;
3737
tlinfo->parent_link = NULL;
3838
tlinfo->xlog_filelist = parray_new();
39+
tlinfo->anchor_lsn = InvalidXLogRecPtr;
40+
tlinfo->anchor_tli = 0;
3941
return tlinfo;
4042
}
4143

@@ -746,6 +748,7 @@ catalog_get_timelines(InstanceConfig *instance)
746748
wal_file->file = *file;
747749
wal_file->segno = segno;
748750
wal_file->type = BACKUP_HISTORY_FILE;
751+
wal_file->keep = false;
749752
parray_append(tlinfo->xlog_filelist, wal_file);
750753
continue;
751754
}
@@ -765,6 +768,7 @@ catalog_get_timelines(InstanceConfig *instance)
765768
wal_file->file = *file;
766769
wal_file->segno = segno;
767770
wal_file->type = PARTIAL_SEGMENT;
771+
wal_file->keep = false;
768772
parray_append(tlinfo->xlog_filelist, wal_file);
769773
continue;
770774
}
@@ -826,6 +830,7 @@ catalog_get_timelines(InstanceConfig *instance)
826830
wal_file->file = *file;
827831
wal_file->segno = segno;
828832
wal_file->type = SEGMENT;
833+
wal_file->keep = false;
829834
parray_append(tlinfo->xlog_filelist, wal_file);
830835
}
831836
/* timeline history file */
@@ -895,6 +900,344 @@ catalog_get_timelines(InstanceConfig *instance)
895900
tlinfo->closest_backup = get_closest_backup(tlinfo);
896901
}
897902

903+
/* determine which WAL segments must be kept because of wal retention */
904+
if (instance->wal_depth <= 0)
905+
return timelineinfos;
906+
907+
/*
908+
* WAL retention for now is fairly simple.
909+
* User can set only one parameter - 'wal-depth'.
910+
* It determines how many latest valid(!) backups on timeline
911+
* must have an ability to perform PITR:
912+
* Consider the example:
913+
*
914+
* ---B1-------B2-------B3-------B4--------> WAL timeline1
915+
*
916+
* If 'wal-depth' is set to 2, then WAL purge should produce the following result:
917+
*
918+
* B1 B2 B3-------B4--------> WAL timeline1
919+
*
920+
* Only valid backup can satisfy 'wal-depth' condition, so if B3 is not OK or DONE,
921+
* then WAL purge should produce the following result:
922+
* B1 B2-------B3-------B4--------> WAL timeline1
923+
*
924+
* Complicated cases, such as branched timelines are taken into account.
925+
* wal-depth is applied to each timeline independently:
926+
*
927+
* |---------> WAL timeline2
928+
* ---B1---|---B2-------B3-------B4--------> WAL timeline1
929+
*
930+
* after WAL purge with wal-depth=2:
931+
*
932+
* |---------> WAL timeline2
933+
* B1---| B2 B3-------B4--------> WAL timeline1
934+
*
935+
* In this example WAL retention prevents purge of WAL required by tli2
936+
* to stay reachable from backup B on tli1.
937+
*
938+
* To protect WAL from purge we try to set 'anchor_lsn' and 'anchor_tli' in every timeline.
939+
* They are usually comes from 'start-lsn' and 'tli' attributes of backup
940+
* calculated by 'wal-depth' parameter.
941+
* With 'wal-depth=2' anchor_backup in tli1 is B3.
942+
943+
* If timeline has not enough valid backups to satisfy 'wal-depth' condition,
944+
* then 'anchor_lsn' and 'anchor_tli' taken from from 'start-lsn' and 'tli
945+
* attribute of closest_backup.
946+
* The interval of WAL starting from closest_backup to switchpoint is
947+
* saved into 'keep_segments' attribute.
948+
* If there is several intermediate timelines between timeline and its closest_backup
949+
* then on every intermediate timeline WAL interval between switchpoint
950+
* and starting segment is placed in 'keep_segments' attributes:
951+
*
952+
* |---------> WAL timeline3
953+
* |------| B5-----B6--> WAL timeline2
954+
* B1---| B2 B3-------B4------------> WAL timeline1
955+
*
956+
* On timeline where closest_backup is located the WAL interval between
957+
* closest_backup and switchpoint is placed into 'keep_segments'.
958+
* If timeline has no 'closest_backup', then 'wal-depth' rules cannot be applied
959+
* to this timeline and its WAL must be purged by following the basic rules of WAL purging.
960+
*
961+
* Third part is handling of ARCHIVE backups.
962+
* If B1 and B2 have ARCHIVE wal-mode, then we must preserve WAL intervals
963+
* between start_lsn and stop_lsn for each of them in 'keep_segments'.
964+
*/
965+
966+
/* determine anchor_lsn and keep_segments for every timeline */
967+
for (int i = 0; i < parray_num(timelineinfos); i++)
968+
{
969+
int count = 0;
970+
timelineInfo *tlinfo = parray_get(timelineinfos, i);
971+
972+
/*
973+
* Iterate backward on backups belonging to this timeline to find
974+
* anchor_backup. NOTE Here we rely on the fact that backups list
975+
* is ordered by start_lsn DESC.
976+
*/
977+
if (tlinfo->backups)
978+
{
979+
for (int j = 0; j < parray_num(tlinfo->backups); j++)
980+
{
981+
pgBackup *backup = parray_get(tlinfo->backups, j);
982+
983+
/* skip invalid backups */
984+
if (backup->status != BACKUP_STATUS_OK &&
985+
backup->status != BACKUP_STATUS_DONE)
986+
continue;
987+
988+
/* sanity */
989+
if (XLogRecPtrIsInvalid(backup->start_lsn) ||
990+
backup->tli <= 0)
991+
continue;
992+
993+
count++;
994+
995+
if (count == instance->wal_depth)
996+
{
997+
elog(LOG, "On timeline %i WAL is protected from purge at %X/%X",
998+
tlinfo->tli,
999+
(uint32) (backup->start_lsn >> 32),
1000+
(uint32) (backup->start_lsn));
1001+
1002+
tlinfo->anchor_lsn = backup->start_lsn;
1003+
tlinfo->anchor_tli = backup->tli;
1004+
break;
1005+
}
1006+
}
1007+
}
1008+
1009+
/*
1010+
* Failed to find anchor backup for this timeline.
1011+
* We cannot just thrown it to the wolves, because by
1012+
* doing that we will violate our own guarantees.
1013+
* So check the existence of closest_backup for
1014+
* this timeline. If there is one, then
1015+
* set the 'anchor_lsn' and 'anchor_tli' to closest_backup
1016+
* 'start-lsn' and 'tli' respectively.
1017+
* |-------------B5----------> WAL timeline3
1018+
* |-----|-------------------------> WAL timeline2
1019+
* B1 B2---| B3 B4-------B6-----> WAL timeline1
1020+
*
1021+
* wal-depth=2
1022+
*
1023+
* If number of valid backups on timelines is less than 'wal-depth'
1024+
* then timeline must(!) stay reachable via parent timelines if any.
1025+
* If closest_backup is not available, then general WAL purge rules
1026+
* are applied.
1027+
*/
1028+
if (XLogRecPtrIsInvalid(tlinfo->anchor_lsn))
1029+
{
1030+
/*
1031+
* Failed to find anchor_lsn in our own timeline.
1032+
* Consider the case:
1033+
* -------------------------------------> tli5
1034+
* ----------------------------B4-------> tli4
1035+
* S3`--------------> tli3
1036+
* S1`------------S3---B3-------B6-> tli2
1037+
* B1---S1-------------B2--------B5-----> tli1
1038+
*
1039+
* B* - backups
1040+
* S* - switchpoints
1041+
* wal-depth=2
1042+
*
1043+
* Expected result:
1044+
* TLI5 will be purged entirely
1045+
* B4-------> tli4
1046+
* S2`--------------> tli3
1047+
* S1`------------S2 B3-------B6-> tli2
1048+
* B1---S1 B2--------B5-----> tli1
1049+
*/
1050+
pgBackup *closest_backup = NULL;
1051+
xlogInterval *interval = NULL;
1052+
TimeLineID tli = 0;
1053+
/* check if tli has closest_backup */
1054+
if (!tlinfo->closest_backup)
1055+
/* timeline has no closest_backup, wal retention cannot be
1056+
* applied to this timeline.
1057+
* Timeline will be purged up to oldest_backup if any or
1058+
* purge entirely if there is none.
1059+
* In example above: tli5 and tli4.
1060+
*/
1061+
continue;
1062+
1063+
/* sanity for closest_backup */
1064+
if (XLogRecPtrIsInvalid(tlinfo->closest_backup->start_lsn) ||
1065+
tlinfo->closest_backup->tli <= 0)
1066+
continue;
1067+
1068+
/*
1069+
* Set anchor_lsn and anchor_tli to protect whole timeline from purge
1070+
* In the example above: tli3.
1071+
*/
1072+
tlinfo->anchor_lsn = tlinfo->closest_backup->start_lsn;
1073+
tlinfo->anchor_tli = tlinfo->closest_backup->tli;
1074+
1075+
/* closest backup may be located not in parent timeline */
1076+
closest_backup = tlinfo->closest_backup;
1077+
1078+
tli = tlinfo->tli;
1079+
1080+
/*
1081+
* Iterate over parent timeline chain and
1082+
* look for timeline where closest_backup belong
1083+
*/
1084+
while (tlinfo->parent_link)
1085+
{
1086+
/* In case of intermediate timeline save to keep_segments
1087+
* begin_segno and switchpoint segment.
1088+
* In case of final timelines save to keep_segments
1089+
* closest_backup start_lsn segment and switchpoint segment.
1090+
*/
1091+
XLogRecPtr switchpoint = tlinfo->switchpoint;
1092+
1093+
tlinfo = tlinfo->parent_link;
1094+
1095+
if (tlinfo->keep_segments == NULL)
1096+
tlinfo->keep_segments = parray_new();
1097+
1098+
/* in any case, switchpoint segment must be added to interval */
1099+
interval = palloc(sizeof(xlogInterval));
1100+
GetXLogSegNo(switchpoint, interval->end_segno, instance->xlog_seg_size);
1101+
1102+
/* Save [S1`, S2] to keep_segments */
1103+
if (tlinfo->tli != closest_backup->tli)
1104+
interval->begin_segno = tlinfo->begin_segno;
1105+
/* Save [B1, S1] to keep_segments */
1106+
else
1107+
GetXLogSegNo(closest_backup->start_lsn, interval->begin_segno, instance->xlog_seg_size);
1108+
1109+
/*
1110+
* TODO: check, maybe this interval is already here or
1111+
* covered by other larger interval.
1112+
*/
1113+
1114+
elog(LOG, "Timeline %i to stay reachable from timeline %i "
1115+
"protect from purge WAL interval between "
1116+
"%08X%08X and %08X%08X on timeline %i",
1117+
tli, closest_backup->tli,
1118+
(uint32) interval->begin_segno / instance->xlog_seg_size,
1119+
(uint32) interval->begin_segno % instance->xlog_seg_size,
1120+
(uint32) interval->end_segno / instance->xlog_seg_size,
1121+
(uint32) interval->end_segno % instance->xlog_seg_size,
1122+
tlinfo->tli);
1123+
parray_append(tlinfo->keep_segments, interval);
1124+
continue;
1125+
}
1126+
continue;
1127+
}
1128+
1129+
/* Iterate over backups left */
1130+
for (int j = count; j < parray_num(tlinfo->backups); j++)
1131+
{
1132+
XLogSegNo segno = 0;
1133+
xlogInterval *interval = NULL;
1134+
pgBackup *backup = parray_get(tlinfo->backups, j);
1135+
1136+
/*
1137+
* We must calculate keep_segments intervals for ARCHIVE backups
1138+
* with start_lsn less than anchor_lsn.
1139+
*/
1140+
1141+
/* STREAM backups cannot contribute to keep_segments */
1142+
if (backup->stream)
1143+
continue;
1144+
1145+
/* sanity */
1146+
if (XLogRecPtrIsInvalid(backup->start_lsn) ||
1147+
backup->tli <= 0)
1148+
continue;
1149+
1150+
/* no point in clogging keep_segments by backups protected by anchor_lsn */
1151+
if (backup->start_lsn >= tlinfo->anchor_lsn)
1152+
continue;
1153+
1154+
/* append interval to keep_segments */
1155+
interval = palloc(sizeof(xlogInterval));
1156+
GetXLogSegNo(backup->start_lsn, segno, instance->xlog_seg_size);
1157+
interval->begin_segno = segno;
1158+
GetXLogSegNo(backup->stop_lsn, segno, instance->xlog_seg_size);
1159+
1160+
/*
1161+
* On replica it is possible to get STOP_LSN pointing to contrecord,
1162+
* so set end_segno to the next segment after STOP_LSN just to be safe.
1163+
*/
1164+
if (backup->from_replica)
1165+
interval->end_segno = segno + 1;
1166+
else
1167+
interval->end_segno = segno;
1168+
1169+
elog(LOG, "Archive backup %s to stay consistent "
1170+
"protect from purge WAL interval "
1171+
"between %08X%08X and %08X%08X on timeline %i",
1172+
base36enc(backup->start_time),
1173+
(uint32) interval->begin_segno / instance->xlog_seg_size,
1174+
(uint32) interval->begin_segno % instance->xlog_seg_size,
1175+
(uint32) interval->end_segno / instance->xlog_seg_size,
1176+
(uint32) interval->end_segno % instance->xlog_seg_size,
1177+
backup->tli);
1178+
1179+
if (tlinfo->keep_segments == NULL)
1180+
tlinfo->keep_segments = parray_new();
1181+
1182+
parray_append(tlinfo->keep_segments, interval);
1183+
}
1184+
}
1185+
1186+
/*
1187+
* Protect WAL segments from deletion by setting 'keep' flag.
1188+
* We must keep all WAL segments after anchor_lsn (including), and also segments
1189+
* required by ARCHIVE backups for consistency - WAL between [start_lsn, stop_lsn].
1190+
*/
1191+
for (int i = 0; i < parray_num(timelineinfos); i++)
1192+
{
1193+
XLogSegNo anchor_segno = 0;
1194+
timelineInfo *tlinfo = parray_get(timelineinfos, i);
1195+
1196+
/*
1197+
* At this point invalid anchor_lsn can be only in one case:
1198+
* timeline is going to be purged by regular WAL purge rules.
1199+
*/
1200+
if (XLogRecPtrIsInvalid(tlinfo->anchor_lsn))
1201+
continue;
1202+
1203+
/*
1204+
* anchor_lsn is located in another timeline, it means that the timeline
1205+
* will be protected from purge entirely.
1206+
*/
1207+
if (tlinfo->anchor_tli > 0 && tlinfo->anchor_tli != tlinfo->tli)
1208+
continue;
1209+
1210+
GetXLogSegNo(tlinfo->anchor_lsn, anchor_segno, instance->xlog_seg_size);
1211+
1212+
for (int i = 0; i < parray_num(tlinfo->xlog_filelist); i++)
1213+
{
1214+
xlogFile *wal_file = (xlogFile *) parray_get(tlinfo->xlog_filelist, i);
1215+
1216+
if (wal_file->segno >= anchor_segno)
1217+
{
1218+
wal_file->keep = true;
1219+
continue;
1220+
}
1221+
1222+
/* no keep segments */
1223+
if (!tlinfo->keep_segments)
1224+
continue;
1225+
1226+
/* Protect segments belonging to one of the keep invervals */
1227+
for (int j = 0; j < parray_num(tlinfo->keep_segments); j++)
1228+
{
1229+
xlogInterval *keep_segments = (xlogInterval *) parray_get(tlinfo->keep_segments, j);
1230+
1231+
if ((wal_file->segno >= keep_segments->begin_segno) &&
1232+
wal_file->segno <= keep_segments->end_segno)
1233+
{
1234+
wal_file->keep = true;
1235+
break;
1236+
}
1237+
}
1238+
}
1239+
}
1240+
8981241
return timelineinfos;
8991242
}
9001243

0 commit comments

Comments
 (0)