31
31
32
32
static int standby_message_timeout = 10 * 1000 ; /* 10 sec = default */
33
33
static XLogRecPtr stop_backup_lsn = InvalidXLogRecPtr ;
34
+ static XLogRecPtr stop_stream_lsn = InvalidXLogRecPtr ;
34
35
35
36
/*
36
37
* How long we should wait for streaming end in seconds.
@@ -45,11 +46,23 @@ const char *progname = "pg_probackup";
45
46
/* list of files contained in backup */
46
47
static parray * backup_files_list = NULL ;
47
48
48
- static pthread_mutex_t start_stream_mut = PTHREAD_MUTEX_INITIALIZER ;
49
49
/*
50
50
* We need to wait end of WAL streaming before execute pg_stop_backup().
51
51
*/
52
+ typedef struct
53
+ {
54
+ const char * basedir ;
55
+ PGconn * conn ;
56
+
57
+ /*
58
+ * Return value from the thread.
59
+ * 0 means there is no error, 1 - there is an error.
60
+ */
61
+ int ret ;
62
+ } StreamThreadArg ;
63
+
52
64
static pthread_t stream_thread ;
65
+ static StreamThreadArg stream_thread_arg = {"" , NULL , 1 };
53
66
54
67
static int is_ptrack_enable = false;
55
68
bool is_ptrack_support = false;
@@ -423,6 +436,9 @@ remote_backup_files(void *arg)
423
436
file -> path , (unsigned long ) file -> write_size );
424
437
PQfinish (file_backup_conn );
425
438
}
439
+
440
+ /* Data files transferring is successful */
441
+ arguments -> ret = 0 ;
426
442
}
427
443
428
444
/*
@@ -440,6 +456,7 @@ do_backup_instance(void)
440
456
441
457
pthread_t backup_threads [num_threads ];
442
458
backup_files_args * backup_threads_args [num_threads ];
459
+ bool backup_isok = true;
443
460
444
461
pgBackup * prev_backup = NULL ;
445
462
char prev_backup_filelist_path [MAXPGPATH ];
@@ -540,13 +557,40 @@ do_backup_instance(void)
540
557
join_path_components (dst_backup_path , database_path , PG_XLOG_DIR );
541
558
dir_create_dir (dst_backup_path , DIR_PERMISSION );
542
559
543
- pthread_mutex_lock (& start_stream_mut );
544
- pthread_create (& stream_thread , NULL , (void * (* )(void * )) StreamLog , dst_backup_path );
545
- pthread_mutex_lock (& start_stream_mut );
546
- if (conn == NULL )
560
+ stream_thread_arg .basedir = dst_backup_path ;
561
+
562
+ /*
563
+ * Connect in replication mode to the server.
564
+ */
565
+ stream_thread_arg .conn = pgut_connect_replication (pgut_dbname );
566
+
567
+ if (!CheckServerVersionForStreaming (stream_thread_arg .conn ))
568
+ {
569
+ PQfinish (stream_thread_arg .conn );
570
+ /*
571
+ * Error message already written in CheckServerVersionForStreaming().
572
+ * There's no hope of recovering from a version mismatch, so don't
573
+ * retry.
574
+ */
575
+ elog (ERROR , "Cannot continue backup because stream connect has failed." );
576
+ }
577
+
578
+ /*
579
+ * Identify server, obtaining start LSN position and current timeline ID
580
+ * at the same time, necessary if not valid data can be found in the
581
+ * existing output directory.
582
+ */
583
+ if (!RunIdentifySystem (stream_thread_arg .conn , NULL , NULL , NULL , NULL ))
584
+ {
585
+ PQfinish (stream_thread_arg .conn );
547
586
elog (ERROR , "Cannot continue backup because stream connect has failed." );
587
+ }
588
+
589
+ /* By default there are some error */
590
+ stream_thread_arg .ret = 1 ;
548
591
549
- pthread_mutex_unlock (& start_stream_mut );
592
+ pthread_create (& stream_thread , NULL , (void * (* )(void * )) StreamLog ,
593
+ & stream_thread_arg );
550
594
}
551
595
552
596
/* initialize backup list */
@@ -652,6 +696,8 @@ do_backup_instance(void)
652
696
arg -> prev_backup_start_lsn = prev_backup_start_lsn ;
653
697
arg -> thread_backup_conn = NULL ;
654
698
arg -> thread_cancel_conn = NULL ;
699
+ /* By default there are some error */
700
+ arg -> ret = 1 ;
655
701
backup_threads_args [i ] = arg ;
656
702
}
657
703
@@ -675,9 +721,15 @@ do_backup_instance(void)
675
721
for (i = 0 ; i < num_threads ; i ++ )
676
722
{
677
723
pthread_join (backup_threads [i ], NULL );
724
+ if (backup_threads_args [i ]-> ret == 1 )
725
+ backup_isok = false;
726
+
678
727
pg_free (backup_threads_args [i ]);
679
728
}
680
- elog (LOG , "Data files are transfered" );
729
+ if (backup_isok )
730
+ elog (LOG , "Data files are transfered" );
731
+ else
732
+ elog (ERROR , "Data files transferring failed" );
681
733
682
734
/* clean previous backup file list */
683
735
if (prev_backup_filelist )
@@ -776,10 +828,10 @@ do_backup(time_t start_time)
776
828
is_checksum_enabled = pg_checksum_enable ();
777
829
778
830
if (is_checksum_enabled )
779
- elog (LOG , "This PostgreSQL instance initialized with data block checksums. "
831
+ elog (LOG , "This PostgreSQL instance was initialized with data block checksums. "
780
832
"Data block corruption will be detected" );
781
833
else
782
- elog (WARNING , "This PostgreSQL instance initialized without data block checksums. "
834
+ elog (WARNING , "This PostgreSQL instance was initialized without data block checksums. "
783
835
"pg_probackup have no way to detect data block corruption without them. "
784
836
"Reinitialize PGDATA with option '--data-checksums'." );
785
837
@@ -1544,7 +1596,8 @@ pg_stop_backup(pgBackup *backup)
1544
1596
FILE * fp ;
1545
1597
pgFile * file ;
1546
1598
size_t len ;
1547
- char * val = NULL ;
1599
+ char * val = NULL ;
1600
+ char * stop_backup_query = NULL ;
1548
1601
1549
1602
/*
1550
1603
* We will use this values if there are no transactions between start_lsn
@@ -1601,26 +1654,25 @@ pg_stop_backup(pgBackup *backup)
1601
1654
* pg_stop_backup(false) copy of the backup label and tablespace map
1602
1655
* so they can be written to disk by the caller.
1603
1656
*/
1604
- sent = pgut_send (conn ,
1605
- "SELECT"
1657
+ stop_backup_query = "SELECT"
1606
1658
" pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot()),"
1607
1659
" current_timestamp(0)::timestamptz,"
1608
1660
" lsn,"
1609
1661
" labelfile,"
1610
1662
" spcmapfile"
1611
- " FROM pg_catalog.pg_stop_backup(false)" ,
1612
- 0 , NULL , WARNING );
1663
+ " FROM pg_catalog.pg_stop_backup(false)" ;
1664
+
1613
1665
}
1614
1666
else
1615
1667
{
1616
1668
1617
- sent = pgut_send (conn ,
1618
- "SELECT"
1669
+ stop_backup_query = "SELECT"
1619
1670
" pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot()),"
1620
1671
" current_timestamp(0)::timestamptz,"
1621
- " pg_catalog.pg_stop_backup() as lsn" ,
1622
- 0 , NULL , WARNING );
1672
+ " pg_catalog.pg_stop_backup() as lsn" ;
1623
1673
}
1674
+
1675
+ sent = pgut_send (conn , stop_backup_query , 0 , NULL , WARNING );
1624
1676
pg_stop_backup_is_sent = true;
1625
1677
if (!sent )
1626
1678
elog (ERROR , "Failed to send pg_stop_backup query" );
@@ -1665,10 +1717,23 @@ pg_stop_backup(pgBackup *backup)
1665
1717
break ;
1666
1718
}
1667
1719
}
1720
+
1721
+ /* Check successfull execution of pg_stop_backup() */
1668
1722
if (!res )
1669
1723
elog (ERROR , "pg_stop backup() failed" );
1670
1724
else
1725
+ {
1726
+ switch (PQresultStatus (res ))
1727
+ {
1728
+ case PGRES_TUPLES_OK :
1729
+ case PGRES_COMMAND_OK :
1730
+ break ;
1731
+ default :
1732
+ elog (ERROR , "query failed: %s query was: %s" ,
1733
+ PQerrorMessage (conn ), stop_backup_query );
1734
+ }
1671
1735
elog (INFO , "pg_stop backup() successfully executed" );
1736
+ }
1672
1737
1673
1738
backup_in_progress = false;
1674
1739
@@ -1771,8 +1836,12 @@ pg_stop_backup(pgBackup *backup)
1771
1836
PQclear (res );
1772
1837
1773
1838
if (stream_wal )
1839
+ {
1774
1840
/* Wait for the completion of stream */
1775
1841
pthread_join (stream_thread , NULL );
1842
+ if (stream_thread_arg .ret == 1 )
1843
+ elog (ERROR , "WAL streaming failed" );
1844
+ }
1776
1845
}
1777
1846
1778
1847
/* Fill in fields if that is the correct end of backup. */
@@ -1858,7 +1927,7 @@ backup_cleanup(bool fatal, void *userdata)
1858
1927
*/
1859
1928
if (current .status == BACKUP_STATUS_RUNNING && current .end_time == 0 )
1860
1929
{
1861
- elog (INFO , "Backup %s is running, setting its status to ERROR" ,
1930
+ elog (WARNING , "Backup %s is running, setting its status to ERROR" ,
1862
1931
base36enc (current .start_time ));
1863
1932
current .end_time = time (NULL );
1864
1933
current .status = BACKUP_STATUS_ERROR ;
@@ -1870,7 +1939,7 @@ backup_cleanup(bool fatal, void *userdata)
1870
1939
*/
1871
1940
if (backup_in_progress )
1872
1941
{
1873
- elog (LOG , "backup in progress, stop backup" );
1942
+ elog (WARNING , "backup in progress, stop backup" );
1874
1943
pg_stop_backup (NULL ); /* don't care stop_lsn on error case */
1875
1944
}
1876
1945
}
@@ -2012,6 +2081,8 @@ backup_files(void *arg)
2012
2081
if (arguments -> thread_backup_conn )
2013
2082
pgut_disconnect (arguments -> thread_backup_conn );
2014
2083
2084
+ /* Data files transferring is successful */
2085
+ arguments -> ret = 0 ;
2015
2086
}
2016
2087
2017
2088
/*
@@ -2548,7 +2619,7 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished)
2548
2619
2549
2620
/* we assume that we get called once at the end of each segment */
2550
2621
if (segment_finished )
2551
- elog (LOG , _ ("finished segment at %X/%X (timeline %u)\n " ),
2622
+ elog (VERBOSE , _ ("finished segment at %X/%X (timeline %u)" ),
2552
2623
(uint32 ) (xlogpos >> 32 ), (uint32 ) xlogpos , timeline );
2553
2624
2554
2625
/*
@@ -2566,7 +2637,10 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished)
2566
2637
if (!XLogRecPtrIsInvalid (stop_backup_lsn ))
2567
2638
{
2568
2639
if (xlogpos > stop_backup_lsn )
2640
+ {
2641
+ stop_stream_lsn = xlogpos ;
2569
2642
return true;
2643
+ }
2570
2644
2571
2645
/* pg_stop_backup() was executed, wait for the completion of stream */
2572
2646
if (stream_stop_timeout == 0 )
@@ -2600,45 +2674,13 @@ StreamLog(void *arg)
2600
2674
{
2601
2675
XLogRecPtr startpos ;
2602
2676
TimeLineID starttli ;
2603
- char * basedir = (char * )arg ;
2604
-
2605
- /*
2606
- * Connect in replication mode to the server
2607
- */
2608
- if (conn == NULL )
2609
- conn = pgut_connect_replication (pgut_dbname );
2610
- if (!conn )
2611
- {
2612
- pthread_mutex_unlock (& start_stream_mut );
2613
- /* Error message already written in GetConnection() */
2614
- return ;
2615
- }
2616
-
2617
- if (!CheckServerVersionForStreaming (conn ))
2618
- {
2619
- /*
2620
- * Error message already written in CheckServerVersionForStreaming().
2621
- * There's no hope of recovering from a version mismatch, so don't
2622
- * retry.
2623
- */
2624
- disconnect_and_exit (1 );
2625
- }
2626
-
2627
- /*
2628
- * Identify server, obtaining start LSN position and current timeline ID
2629
- * at the same time, necessary if not valid data can be found in the
2630
- * existing output directory.
2631
- */
2632
- if (!RunIdentifySystem (conn , NULL , & starttli , & startpos , NULL ))
2633
- disconnect_and_exit (1 );
2634
-
2635
- /* Ok we have normal stream connect and main process can work again */
2636
- pthread_mutex_unlock (& start_stream_mut );
2677
+ StreamThreadArg * stream_arg = (StreamThreadArg * ) arg ;
2637
2678
2638
2679
/*
2639
2680
* We must use startpos as start_lsn from start_backup
2640
2681
*/
2641
2682
startpos = current .start_lsn ;
2683
+ starttli = current .tli ;
2642
2684
2643
2685
/*
2644
2686
* Always start streaming at the beginning of a segment
@@ -2652,7 +2694,7 @@ StreamLog(void *arg)
2652
2694
/*
2653
2695
* Start the replication
2654
2696
*/
2655
- elog (LOG , _ ("starting log streaming at %X/%X (timeline %u)\n " ),
2697
+ elog (LOG , _ ("started streaming WAL at %X/%X (timeline %u)" ),
2656
2698
(uint32 ) (startpos >> 32 ), (uint32 ) startpos , starttli );
2657
2699
2658
2700
#if PG_VERSION_NUM >= 90600
@@ -2666,11 +2708,11 @@ StreamLog(void *arg)
2666
2708
ctl .sysidentifier = NULL ;
2667
2709
2668
2710
#if PG_VERSION_NUM >= 100000
2669
- ctl .walmethod = CreateWalDirectoryMethod (basedir , 0 , true);
2711
+ ctl .walmethod = CreateWalDirectoryMethod (stream_arg -> basedir , 0 , true);
2670
2712
ctl .replication_slot = replication_slot ;
2671
2713
ctl .stop_socket = PGINVALID_SOCKET ;
2672
2714
#else
2673
- ctl .basedir = basedir ;
2715
+ ctl .basedir = ( char * ) stream_arg -> basedir ;
2674
2716
#endif
2675
2717
2676
2718
ctl .stream_stop = stop_streaming ;
@@ -2679,7 +2721,7 @@ StreamLog(void *arg)
2679
2721
ctl .synchronous = false;
2680
2722
ctl .mark_done = false;
2681
2723
2682
- if (ReceiveXlogStream (conn , & ctl ) == false)
2724
+ if (ReceiveXlogStream (stream_arg -> conn , & ctl ) == false)
2683
2725
elog (ERROR , "Problem in receivexlog" );
2684
2726
2685
2727
#if PG_VERSION_NUM >= 100000
@@ -2689,14 +2731,18 @@ StreamLog(void *arg)
2689
2731
#endif
2690
2732
}
2691
2733
#else
2692
- if (ReceiveXlogStream (conn , startpos , starttli , NULL , basedir ,
2693
- stop_streaming , standby_message_timeout , NULL ,
2694
- false, false) == false)
2734
+ if (ReceiveXlogStream (stream_arg -> conn , startpos , starttli , NULL , basedir ,
2735
+ stop_streaming , standby_message_timeout , NULL ,
2736
+ false, false) == false)
2695
2737
elog (ERROR , "Problem in receivexlog" );
2696
2738
#endif
2697
2739
2698
- PQfinish (conn );
2699
- conn = NULL ;
2740
+ elog (LOG , _ ("finished streaming WAL at %X/%X (timeline %u)" ),
2741
+ (uint32 ) (stop_stream_lsn >> 32 ), (uint32 ) stop_stream_lsn , starttli );
2742
+ stream_arg -> ret = 0 ;
2743
+
2744
+ PQfinish (stream_arg -> conn );
2745
+ stream_arg -> conn = NULL ;
2700
2746
}
2701
2747
2702
2748
/*
0 commit comments