Skip to content

Commit 2e43611

Browse files
committed
PGPRO-1311: Do not call exit() for thread, use pthread_exit() instead
1 parent f25ae41 commit 2e43611

File tree

7 files changed

+119
-21
lines changed

7 files changed

+119
-21
lines changed

src/backup.c

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,22 @@ const char *progname = "pg_probackup";
4747
static parray *backup_files_list = NULL;
4848

4949
static pthread_mutex_t start_stream_mut = PTHREAD_MUTEX_INITIALIZER;
50+
5051
/*
5152
* We need to wait end of WAL streaming before execute pg_stop_backup().
5253
*/
54+
typedef struct
55+
{
56+
const char *basedir;
57+
/*
58+
* Return value from the thread.
59+
* 0 means there is no error, 1 - there is an error.
60+
*/
61+
int ret;
62+
} StreamThreadArg;
63+
5364
static pthread_t stream_thread;
65+
static StreamThreadArg stream_thread_arg = {"", 1};
5466

5567
static int is_ptrack_enable = false;
5668
bool is_ptrack_support = false;
@@ -424,6 +436,9 @@ remote_backup_files(void *arg)
424436
file->path, (unsigned long) file->write_size);
425437
PQfinish(file_backup_conn);
426438
}
439+
440+
/* Data files transferring is successful */
441+
arguments->ret = 0;
427442
}
428443

429444
/*
@@ -441,6 +456,7 @@ do_backup_instance(void)
441456

442457
pthread_t backup_threads[num_threads];
443458
backup_files_args *backup_threads_args[num_threads];
459+
bool backup_isok = true;
444460

445461
pgBackup *prev_backup = NULL;
446462
char prev_backup_filelist_path[MAXPGPATH];
@@ -541,8 +557,13 @@ do_backup_instance(void)
541557
join_path_components(dst_backup_path, database_path, PG_XLOG_DIR);
542558
dir_create_dir(dst_backup_path, DIR_PERMISSION);
543559

560+
stream_thread_arg.basedir = dst_backup_path;
561+
/* By default there are some error */
562+
stream_thread_arg.ret = 1;
563+
544564
pthread_mutex_lock(&start_stream_mut);
545-
pthread_create(&stream_thread, NULL, (void *(*)(void *)) StreamLog, dst_backup_path);
565+
pthread_create(&stream_thread, NULL, (void *(*)(void *)) StreamLog,
566+
&stream_thread_arg);
546567
pthread_mutex_lock(&start_stream_mut);
547568
if (conn == NULL)
548569
elog(ERROR, "Cannot continue backup because stream connect has failed.");
@@ -653,6 +674,8 @@ do_backup_instance(void)
653674
arg->prev_backup_start_lsn = prev_backup_start_lsn;
654675
arg->thread_backup_conn = NULL;
655676
arg->thread_cancel_conn = NULL;
677+
/* By default there are some error */
678+
arg->ret = 1;
656679
backup_threads_args[i] = arg;
657680
}
658681

@@ -676,9 +699,15 @@ do_backup_instance(void)
676699
for (i = 0; i < num_threads; i++)
677700
{
678701
pthread_join(backup_threads[i], NULL);
702+
if (backup_threads_args[i]->ret == 1)
703+
backup_isok = false;
704+
679705
pg_free(backup_threads_args[i]);
680706
}
681-
elog(LOG, "Data files are transfered");
707+
if (backup_isok)
708+
elog(LOG, "Data files are transfered");
709+
else
710+
elog(ERROR, "Data files transferring failed");
682711

683712
/* clean previous backup file list */
684713
if (prev_backup_filelist)
@@ -1784,8 +1813,12 @@ pg_stop_backup(pgBackup *backup)
17841813
PQclear(res);
17851814

17861815
if (stream_wal)
1816+
{
17871817
/* Wait for the completion of stream */
17881818
pthread_join(stream_thread, NULL);
1819+
if (stream_thread_arg.ret == 1)
1820+
elog(ERROR, "WAL streaming failed");
1821+
}
17891822
}
17901823

17911824
/* Fill in fields if that is the correct end of backup. */
@@ -2025,6 +2058,8 @@ backup_files(void *arg)
20252058
if (arguments->thread_backup_conn)
20262059
pgut_disconnect(arguments->thread_backup_conn);
20272060

2061+
/* Data files transferring is successful */
2062+
arguments->ret = 0;
20282063
}
20292064

20302065
/*
@@ -2616,7 +2651,7 @@ StreamLog(void *arg)
26162651
{
26172652
XLogRecPtr startpos;
26182653
TimeLineID starttli;
2619-
char *basedir = (char *)arg;
2654+
StreamThreadArg *stream_arg = (StreamThreadArg *) arg;
26202655

26212656
/*
26222657
* Connect in replication mode to the server
@@ -2682,7 +2717,7 @@ StreamLog(void *arg)
26822717
ctl.sysidentifier = NULL;
26832718

26842719
#if PG_VERSION_NUM >= 100000
2685-
ctl.walmethod = CreateWalDirectoryMethod(basedir, 0, true);
2720+
ctl.walmethod = CreateWalDirectoryMethod(stream_arg->basedir, 0, true);
26862721
ctl.replication_slot = replication_slot;
26872722
ctl.stop_socket = PGINVALID_SOCKET;
26882723
#else
@@ -2713,6 +2748,7 @@ StreamLog(void *arg)
27132748

27142749
elog(LOG, _("finished streaming WAL at %X/%X (timeline %u)"),
27152750
(uint32) (stop_stream_lsn >> 32), (uint32) stop_stream_lsn, starttli);
2751+
stream_arg->ret = 0;
27162752

27172753
PQfinish(conn);
27182754
conn = NULL;

src/pg_probackup.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -241,19 +241,25 @@ typedef struct pgRecoveryTarget
241241
/* Union to ease operations on relation pages */
242242
typedef union DataPage
243243
{
244-
PageHeaderData page_data;
245-
char data[BLCKSZ];
244+
PageHeaderData page_data;
245+
char data[BLCKSZ];
246246
} DataPage;
247247

248248
typedef struct
249249
{
250250
const char *from_root;
251251
const char *to_root;
252-
parray *backup_files_list;
253-
parray *prev_backup_filelist;
254-
XLogRecPtr prev_backup_start_lsn;
255-
PGconn *thread_backup_conn;
256-
PGcancel *thread_cancel_conn;
252+
parray *backup_files_list;
253+
parray *prev_backup_filelist;
254+
XLogRecPtr prev_backup_start_lsn;
255+
PGconn *thread_backup_conn;
256+
PGcancel *thread_cancel_conn;
257+
258+
/*
259+
* Return value from the thread.
260+
* 0 means there is no error, 1 - there is an error.
261+
*/
262+
int ret;
257263
} backup_files_args;
258264

259265
/*

src/restore.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ typedef struct
2222
{
2323
parray *files;
2424
pgBackup *backup;
25+
26+
/*
27+
* Return value from the thread.
28+
* 0 means there is no error, 1 - there is an error.
29+
*/
30+
int ret;
2531
} restore_files_args;
2632

2733
/* Tablespace mapping structures */
@@ -374,6 +380,7 @@ restore_backup(pgBackup *backup)
374380
int i;
375381
pthread_t restore_threads[num_threads];
376382
restore_files_args *restore_threads_args[num_threads];
383+
bool restore_isok = true;
377384

378385
if (backup->status != BACKUP_STATUS_OK)
379386
elog(ERROR, "Backup %s cannot be restored because it is not valid",
@@ -419,19 +426,27 @@ restore_backup(pgBackup *backup)
419426
restore_files_args *arg = pg_malloc(sizeof(restore_files_args));
420427
arg->files = files;
421428
arg->backup = backup;
429+
/* By default there are some error */
430+
arg->ret = 1;
422431

423432
elog(LOG, "Start thread for num:%li", parray_num(files));
424433

425434
restore_threads_args[i] = arg;
426-
pthread_create(&restore_threads[i], NULL, (void *(*)(void *)) restore_files, arg);
435+
pthread_create(&restore_threads[i], NULL,
436+
(void *(*)(void *)) restore_files, arg);
427437
}
428438

429439
/* Wait theads */
430440
for (i = 0; i < num_threads; i++)
431441
{
432442
pthread_join(restore_threads[i], NULL);
443+
if (restore_threads_args[i]->ret == 1)
444+
restore_isok = false;
445+
433446
pg_free(restore_threads_args[i]);
434447
}
448+
if (!restore_isok)
449+
elog(ERROR, "Data files restoring failed");
435450

436451
/* cleanup */
437452
parray_walk(files, pgFileFree);
@@ -777,6 +792,9 @@ restore_files(void *arg)
777792
elog(LOG, "Restored file %s : %lu bytes",
778793
file->path, (unsigned long) file->write_size);
779794
}
795+
796+
/* Data files restoring is successful */
797+
arguments->ret = 0;
780798
}
781799

782800
static void

src/utils/logger.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ elog_internal(int elevel, bool file_only, const char *fmt, va_list args)
142142
* There is no need to lock if this is elog() from upper elog() and
143143
* logging is not initialized.
144144
*/
145-
if (write_to_file || write_to_error_log)
145+
if (write_to_file || write_to_error_log || write_to_stderr)
146146
pthread_mutex_lock(&log_file_mutex);
147147

148148
/* We need copy args only if we need write to error log file */
@@ -228,12 +228,25 @@ elog_internal(int elevel, bool file_only, const char *fmt, va_list args)
228228
va_end(std_args);
229229
}
230230

231-
if (write_to_file || write_to_error_log)
231+
if (write_to_file || write_to_error_log || write_to_stderr)
232232
pthread_mutex_unlock(&log_file_mutex);
233233

234-
/* Exit with code if it is an error */
235-
if (elevel > WARNING)
236-
exit(elevel);
234+
/*
235+
* Exit with code if it is an error.
236+
* Check for in_cleanup flag to avoid deadlock in case of ERROR in cleanup
237+
* routines.
238+
*/
239+
if (elevel > WARNING && !in_cleanup)
240+
{
241+
/* Interrupt other possible routines */
242+
interrupted = true;
243+
244+
/* If this is not the main thread then don't call exit() */
245+
if (!pthread_equal(main_tid, pthread_self()))
246+
pthread_exit(NULL);
247+
else
248+
exit(elevel);
249+
}
237250
}
238251

239252
/*

src/utils/pgut.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ static char *password = NULL;
4242
bool prompt_password = true;
4343
bool force_password = false;
4444

45+
pthread_t main_tid = 0;
46+
4547
/* Database connections */
4648
static PGcancel *volatile cancel_conn = NULL;
4749

@@ -1065,6 +1067,7 @@ pgut_getopt(int argc, char **argv, pgut_option options[])
10651067

10661068
init_cancel_handler();
10671069
atexit(on_cleanup);
1070+
main_tid = pthread_self();
10681071

10691072
return optind;
10701073
}

src/utils/pgut.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "pqexpbuffer.h"
1616

1717
#include <assert.h>
18+
#include <pthread.h>
1819
#include <sys/time.h>
1920

2021
#include "logger.h"
@@ -93,6 +94,9 @@ extern const char *PROGRAM_VERSION;
9394
extern const char *PROGRAM_URL;
9495
extern const char *PROGRAM_EMAIL;
9596

97+
/* ID of the main thread */
98+
extern pthread_t main_tid;
99+
96100
extern void pgut_help(bool details);
97101

98102
/*

src/validate.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ typedef struct
2323
{
2424
parray *files;
2525
bool corrupted;
26+
27+
/*
28+
* Return value from the thread.
29+
* 0 means there is no error, 1 - there is an error.
30+
*/
31+
int ret;
2632
} validate_files_args;
2733

2834
/*
@@ -35,6 +41,7 @@ pgBackupValidate(pgBackup *backup)
3541
char path[MAXPGPATH];
3642
parray *files;
3743
bool corrupted = false;
44+
bool validation_isok = true;
3845
pthread_t validate_threads[num_threads];
3946
validate_files_args *validate_threads_args[num_threads];
4047
int i;
@@ -79,8 +86,12 @@ pgBackupValidate(pgBackup *backup)
7986
validate_files_args *arg = pg_malloc(sizeof(validate_files_args));
8087
arg->files = files;
8188
arg->corrupted = false;
89+
/* By default there are some error */
90+
arg->ret = 1;
91+
8292
validate_threads_args[i] = arg;
83-
pthread_create(&validate_threads[i], NULL, (void *(*)(void *)) pgBackupValidateFiles, arg);
93+
pthread_create(&validate_threads[i], NULL,
94+
(void *(*)(void *)) pgBackupValidateFiles, arg);
8495
}
8596

8697
/* Wait theads */
@@ -89,8 +100,12 @@ pgBackupValidate(pgBackup *backup)
89100
pthread_join(validate_threads[i], NULL);
90101
if (validate_threads_args[i]->corrupted)
91102
corrupted = true;
103+
if (validate_threads_args[i]->ret == 1)
104+
validation_isok = false;
92105
pg_free(validate_threads_args[i]);
93106
}
107+
if (!validation_isok)
108+
elog(ERROR, "Data files validation failed");
94109

95110
/* cleanup */
96111
parray_walk(files, pgFileFree);
@@ -159,7 +174,7 @@ pgBackupValidateFiles(void *arg)
159174
elog(WARNING, "Cannot stat backup file \"%s\": %s",
160175
file->path, strerror(errno));
161176
arguments->corrupted = true;
162-
return;
177+
break;
163178
}
164179

165180
if (file->write_size != st.st_size)
@@ -168,7 +183,7 @@ pgBackupValidateFiles(void *arg)
168183
file->path, (unsigned long) file->write_size,
169184
(unsigned long) st.st_size);
170185
arguments->corrupted = true;
171-
return;
186+
break;
172187
}
173188

174189
crc = pgFileGetCRC(file);
@@ -177,9 +192,12 @@ pgBackupValidateFiles(void *arg)
177192
elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X",
178193
file->path, file->crc, crc);
179194
arguments->corrupted = true;
180-
return;
195+
break;
181196
}
182197
}
198+
199+
/* Data files validation is successful */
200+
arguments->ret = 0;
183201
}
184202

185203
/*

0 commit comments

Comments
 (0)