@@ -56,7 +56,7 @@ static void pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, PGNode
56
56
57
57
static XLogRecPtr wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn , TimeLineID tli ,
58
58
bool in_prev_segment , bool segment_only ,
59
- int timeout_elevel , bool in_stream_dir );
59
+ int timeout_elevel , bool in_stream_dir , pgBackup * backup );
60
60
61
61
static void check_external_for_tablespaces (parray * external_list ,
62
62
PGconn * backup_conn );
@@ -165,8 +165,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool
165
165
"trying to look up on previous timelines" ,
166
166
current .tli );
167
167
168
- /* TODO: use read_timeline_history */
169
- tli_list = catalog_get_timelines (& instance_config );
168
+ tli_list = get_history_streaming (& instance_config .conn_opt , current .tli , backup_list );
169
+ if (!tli_list )
170
+ /* fallback to using archive */
171
+ tli_list = catalog_get_timelines (& instance_config );
170
172
171
173
if (parray_num (tli_list ) == 0 )
172
174
elog (WARNING , "Cannot find valid backup on previous timelines, "
@@ -268,7 +270,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool
268
270
* Because WAL streaming will start after pg_start_backup() in stream
269
271
* mode.
270
272
*/
271
- wait_wal_lsn (current .start_lsn , true, current .tli , false, true, ERROR , false);
273
+ wait_wal_lsn (current .start_lsn , true, current .tli , false, true, ERROR , false, & current );
272
274
}
273
275
274
276
/* start stream replication */
@@ -279,6 +281,12 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool
279
281
280
282
start_WAL_streaming (backup_conn , dst_backup_path , & instance_config .conn_opt ,
281
283
current .start_lsn , current .tli );
284
+
285
+ /* Make sure that WAL streaming is working
286
+ * PAGE backup in stream mode is waited twice, first for
287
+ * segment in WAL archive and then for streamed segment
288
+ */
289
+ wait_wal_lsn (current .start_lsn , true, current .tli , false, true, ERROR , true, & current );
282
290
}
283
291
284
292
/* initialize backup's file list */
@@ -1262,7 +1270,7 @@ pg_is_superuser(PGconn *conn)
1262
1270
static XLogRecPtr
1263
1271
wait_wal_lsn (XLogRecPtr target_lsn , bool is_start_lsn , TimeLineID tli ,
1264
1272
bool in_prev_segment , bool segment_only ,
1265
- int timeout_elevel , bool in_stream_dir )
1273
+ int timeout_elevel , bool in_stream_dir , pgBackup * backup )
1266
1274
{
1267
1275
XLogSegNo targetSegNo ;
1268
1276
char pg_wal_dir [MAXPGPATH ];
@@ -1294,15 +1302,14 @@ wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli,
1294
1302
*/
1295
1303
if (in_stream_dir )
1296
1304
{
1297
- pgBackupGetPath2 (& current , pg_wal_dir , lengthof (pg_wal_dir ),
1298
- DATABASE_DIR , PG_XLOG_DIR );
1305
+ join_path_components (pg_wal_dir , backup -> database_dir , PG_XLOG_DIR );
1299
1306
join_path_components (wal_segment_path , pg_wal_dir , wal_segment );
1300
1307
wal_segment_dir = pg_wal_dir ;
1301
1308
}
1302
1309
else
1303
1310
{
1304
1311
join_path_components (wal_segment_path , arclog_path , wal_segment );
1305
- wal_segment_dir = arclog_path ;
1312
+ wal_segment_dir = arclog_path ; /* global var */
1306
1313
}
1307
1314
1308
1315
/* TODO: remove this in 3.0 (it is a cludge against some old bug with archive_timeout) */
@@ -1394,7 +1401,7 @@ wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli,
1394
1401
1395
1402
sleep (1 );
1396
1403
if (interrupted )
1397
- elog (ERROR , "Interrupted during waiting for WAL archiving" );
1404
+ elog (ERROR , "Interrupted during waiting for WAL %s" , in_stream_dir ? "streaming" : " archiving" );
1398
1405
try_count ++ ;
1399
1406
1400
1407
/* Inform user if WAL segment is absent in first attempt */
@@ -1418,9 +1425,10 @@ wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli,
1418
1425
{
1419
1426
if (file_exists )
1420
1427
elog (timeout_elevel , "WAL segment %s was %s, "
1421
- "but target LSN %X/%X could not be archived in %d seconds" ,
1428
+ "but target LSN %X/%X could not be %s in %d seconds" ,
1422
1429
wal_segment , wal_delivery_str ,
1423
- (uint32 ) (target_lsn >> 32 ), (uint32 ) target_lsn , timeout );
1430
+ (uint32 ) (target_lsn >> 32 ), (uint32 ) target_lsn ,
1431
+ wal_delivery_str , timeout );
1424
1432
/* If WAL segment doesn't exist or we wait for previous segment */
1425
1433
else
1426
1434
elog (timeout_elevel ,
@@ -1572,8 +1580,13 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1572
1580
*/
1573
1581
if (pg_stop_backup_is_sent && !in_cleanup )
1574
1582
{
1583
+ int timeout = ARCHIVE_TIMEOUT_DEFAULT ;
1575
1584
res = NULL ;
1576
1585
1586
+ /* kludge against some old bug in archive_timeout. TODO: remove in 3.0.0 */
1587
+ if (instance_config .archive_timeout > 0 )
1588
+ timeout = instance_config .archive_timeout ;
1589
+
1577
1590
while (1 )
1578
1591
{
1579
1592
if (!PQconsumeInput (conn ))
@@ -1598,11 +1611,10 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1598
1611
* If postgres haven't answered in archive_timeout seconds,
1599
1612
* send an interrupt.
1600
1613
*/
1601
- if (pg_stop_backup_timeout > instance_config . archive_timeout )
1614
+ if (pg_stop_backup_timeout > timeout )
1602
1615
{
1603
1616
pgut_cancel (conn );
1604
- elog (ERROR , "pg_stop_backup doesn't answer in %d seconds, cancel it" ,
1605
- instance_config .archive_timeout );
1617
+ elog (ERROR , "pg_stop_backup doesn't answer in %d seconds, cancel it" , timeout );
1606
1618
}
1607
1619
}
1608
1620
else
@@ -1701,7 +1713,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1701
1713
{
1702
1714
/* Wait for segment with current stop_lsn, it is ok for it to never arrive */
1703
1715
wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1704
- false, true, WARNING , stream_wal );
1716
+ false, true, WARNING , stream_wal , backup );
1705
1717
1706
1718
/* Get the first record in segment with current stop_lsn */
1707
1719
lsn_tmp = get_first_record_lsn (xlog_path , segno , backup -> tli ,
@@ -1729,7 +1741,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1729
1741
* because previous record can be the contrecord.
1730
1742
*/
1731
1743
lsn_tmp = wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1732
- true, false, ERROR , stream_wal );
1744
+ true, false, ERROR , stream_wal , backup );
1733
1745
1734
1746
/* sanity */
1735
1747
if (!XRecOffIsValid (lsn_tmp ) || XLogRecPtrIsInvalid (lsn_tmp ))
@@ -1743,7 +1755,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1743
1755
{
1744
1756
/* Wait for segment with current stop_lsn */
1745
1757
wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1746
- false, true, ERROR , stream_wal );
1758
+ false, true, ERROR , stream_wal , backup );
1747
1759
1748
1760
/* Get the next closest record in segment with current stop_lsn */
1749
1761
lsn_tmp = get_next_record_lsn (xlog_path , segno , backup -> tli ,
@@ -1872,7 +1884,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1872
1884
*/
1873
1885
if (!stop_lsn_exists )
1874
1886
stop_backup_lsn = wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1875
- false, false, ERROR , stream_wal );
1887
+ false, false, ERROR , stream_wal , backup );
1876
1888
1877
1889
if (stream_wal )
1878
1890
{
0 commit comments