@@ -108,8 +108,8 @@ static int checkpoint_timeout(void);
108108
109109//static void backup_list_file(parray *files, const char *root, )
110110static void parse_backup_filelist_filenames (parray * files , const char * root );
111- static void wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn ,
112- bool wait_prev_segment );
111+ static XLogRecPtr wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn ,
112+ bool wait_prev_segment );
113113static void wait_replica_wal_lsn (XLogRecPtr lsn , bool is_start_backup );
114114static void make_pagemap_from_ptrack (parray * files );
115115static void * StreamLog (void * arg );
@@ -1184,7 +1184,7 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup)
11841184
11851185 if (current .backup_mode == BACKUP_MODE_DIFF_PAGE )
11861186 /* In PAGE mode wait for current segment... */
1187- wait_wal_lsn (backup -> start_lsn , true, false);
1187+ wait_wal_lsn (backup -> start_lsn , true, false);
11881188 /*
11891189 * Do not wait start_lsn for stream backup.
11901190 * Because WAL streaming will start after pg_start_backup() in stream
@@ -1524,8 +1524,11 @@ pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_filenode,
15241524 * be archived in archive 'wal' directory regardless stream mode.
15251525 *
15261526 * If 'wait_prev_segment' wait for previous segment.
1527+ *
1528+ * Returns LSN of last valid record if wait_prev_segment is not true, otherwise
1529+ * returns InvalidXLogRecPtr.
15271530 */
1528- static void
1531+ static XLogRecPtr
15291532wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn , bool wait_prev_segment )
15301533{
15311534 TimeLineID tli ;
@@ -1565,25 +1568,22 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15651568 DATABASE_DIR , PG_XLOG_DIR );
15661569 join_path_components (wal_segment_path , pg_wal_dir , wal_segment );
15671570 wal_segment_dir = pg_wal_dir ;
1568-
1569- timeout = (uint32 ) checkpoint_timeout ();
1570- timeout = timeout + timeout * 0.1 ;
15711571 }
15721572 else
15731573 {
15741574 join_path_components (wal_segment_path , arclog_path , wal_segment );
15751575 wal_segment_dir = arclog_path ;
1576-
1577- if (instance_config .archive_timeout > 0 )
1578- timeout = instance_config .archive_timeout ;
1579- else
1580- timeout = ARCHIVE_TIMEOUT_DEFAULT ;
15811576 }
15821577
1578+ if (instance_config .archive_timeout > 0 )
1579+ timeout = instance_config .archive_timeout ;
1580+ else
1581+ timeout = ARCHIVE_TIMEOUT_DEFAULT ;
1582+
15831583 if (wait_prev_segment )
15841584 elog (LOG , "Looking for segment: %s" , wal_segment );
15851585 else
1586- elog (LOG , "Looking for LSN: %X/%X in segment: %s" ,
1586+ elog (LOG , "Looking for LSN %X/%X in segment: %s" ,
15871587 (uint32 ) (lsn >> 32 ), (uint32 ) lsn , wal_segment );
15881588
15891589#ifdef HAVE_LIBZ
@@ -1615,7 +1615,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
16151615 {
16161616 /* Do not check LSN for previous WAL segment */
16171617 if (wait_prev_segment )
1618- return ;
1618+ return InvalidXLogRecPtr ;
16191619
16201620 /*
16211621 * A WAL segment found. Check LSN on it.
@@ -1625,7 +1625,29 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
16251625 /* Target LSN was found */
16261626 {
16271627 elog (LOG , "Found LSN: %X/%X" , (uint32 ) (lsn >> 32 ), (uint32 ) lsn );
1628- return ;
1628+ return lsn ;
1629+ }
1630+
1631+ /*
1632+ * If we failed to get LSN of valid record in a reasonable time, try
1633+ * to get LSN of last valid record prior to the target LSN. But only
1634+ * in case of a backup from a replica.
1635+ */
1636+ if (!exclusive_backup && current .from_replica &&
1637+ (try_count > timeout / 4 ))
1638+ {
1639+ XLogRecPtr res ;
1640+
1641+ res = get_last_wal_lsn (wal_segment_dir , current .start_lsn ,
1642+ lsn , tli , false,
1643+ instance_config .xlog_seg_size );
1644+ if (!XLogRecPtrIsInvalid (res ))
1645+ {
1646+ /* LSN of the prior record was found */
1647+ elog (LOG , "Found prior LSN: %X/%X, it is used as stop LSN" ,
1648+ (uint32 ) (res >> 32 ), (uint32 ) res );
1649+ return res ;
1650+ }
16291651 }
16301652 }
16311653
@@ -1748,6 +1770,7 @@ pg_stop_backup(pgBackup *backup)
17481770 size_t len ;
17491771 char * val = NULL ;
17501772 char * stop_backup_query = NULL ;
1773+ bool stop_lsn_exists = false;
17511774
17521775 /*
17531776 * We will use this values if there are no transactions between start_lsn
@@ -1826,15 +1849,23 @@ pg_stop_backup(pgBackup *backup)
18261849#endif
18271850 " labelfile,"
18281851 " spcmapfile"
1852+ #if PG_VERSION_NUM >= 100000
1853+ " FROM pg_catalog.pg_stop_backup(false, false)" ;
1854+ #else
18291855 " FROM pg_catalog.pg_stop_backup(false)" ;
1856+ #endif
18301857 else
18311858 stop_backup_query = "SELECT"
18321859 " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot()),"
18331860 " current_timestamp(0)::timestamptz,"
18341861 " lsn,"
18351862 " labelfile,"
18361863 " spcmapfile"
1864+ #if PG_VERSION_NUM >= 100000
1865+ " FROM pg_catalog.pg_stop_backup(false, false)" ;
1866+ #else
18371867 " FROM pg_catalog.pg_stop_backup(false)" ;
1868+ #endif
18381869
18391870 }
18401871 else
@@ -1923,7 +1954,29 @@ pg_stop_backup(pgBackup *backup)
19231954 if (!XRecOffIsValid (stop_backup_lsn ))
19241955 {
19251956 if (XRecOffIsNull (stop_backup_lsn ))
1926- stop_backup_lsn = stop_backup_lsn + SizeOfXLogLongPHD ;
1957+ {
1958+ char * xlog_path ,
1959+ stream_xlog_path [MAXPGPATH ];
1960+
1961+ if (stream_wal )
1962+ {
1963+ pgBackupGetPath2 (backup , stream_xlog_path ,
1964+ lengthof (stream_xlog_path ),
1965+ DATABASE_DIR , PG_XLOG_DIR );
1966+ xlog_path = stream_xlog_path ;
1967+ }
1968+ else
1969+ xlog_path = arclog_path ;
1970+
1971+ stop_backup_lsn = get_last_wal_lsn (xlog_path , backup -> start_lsn ,
1972+ stop_backup_lsn , backup -> tli ,
1973+ true, instance_config .xlog_seg_size );
1974+ /*
1975+ * Do not check existance of LSN again below using
1976+ * wait_wal_lsn().
1977+ */
1978+ stop_lsn_exists = true;
1979+ }
19271980 else
19281981 elog (ERROR , "Invalid stop_backup_lsn value %X/%X" ,
19291982 (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
@@ -2029,13 +2082,15 @@ pg_stop_backup(pgBackup *backup)
20292082 stream_xlog_path [MAXPGPATH ];
20302083
20312084 /* Wait for stop_lsn to be received by replica */
2032- if (current .from_replica )
2033- wait_replica_wal_lsn (stop_backup_lsn , false);
2085+ /* XXX Do we need this? */
2086+ // if (current.from_replica)
2087+ // wait_replica_wal_lsn(stop_backup_lsn, false);
20342088 /*
20352089 * Wait for stop_lsn to be archived or streamed.
20362090 * We wait for stop_lsn in stream mode just in case.
20372091 */
2038- wait_wal_lsn (stop_backup_lsn , false, false);
2092+ if (!stop_lsn_exists )
2093+ stop_backup_lsn = wait_wal_lsn (stop_backup_lsn , false, false);
20392094
20402095 if (stream_wal )
20412096 {
@@ -2623,7 +2678,7 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished)
26232678
26242679 if (!XLogRecPtrIsInvalid (stop_backup_lsn ))
26252680 {
2626- if (xlogpos > stop_backup_lsn )
2681+ if (xlogpos >= stop_backup_lsn )
26272682 {
26282683 stop_stream_lsn = xlogpos ;
26292684 return true;
0 commit comments