Skip to content

Commit 0076462

Browse files
committed
Merge branch 'pgpro-1286-no-validation-restore'
2 parents 2650fdb + 5bed18c commit 0076462

File tree

5 files changed

+219
-54
lines changed

5 files changed

+219
-54
lines changed

src/help.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ help_restore(void)
268268
printf(_(" [--timeline=timeline] [-T OLDDIR=NEWDIR]\n"));
269269
printf(_(" [--immediate] [--recovery-target-name=target-name]\n"));
270270
printf(_(" [--recovery-target-action=pause|promote|shutdown]\n"));
271-
printf(_(" [--restore-as-replica]\n\n"));
271+
printf(_(" [--restore-as-replica] [--no-validate]\n\n"));
272272

273273
printf(_(" -B, --backup-path=backup-path location of the backup storage area\n"));
274274
printf(_(" --instance=instance_name name of the instance\n"));
@@ -285,6 +285,7 @@ help_restore(void)
285285
printf(_(" relocate the tablespace from directory OLDDIR to NEWDIR\n"));
286286

287287
printf(_(" --immediate end recovery as soon as a consistent state is reached\n"));
288+
printf(_(" --no-validate disable backup validation during recovery\n"));
288289
printf(_(" --recovery-target-name=target-name\n"));
289290
printf(_(" the named restore point to which recovery will proceed\n"));
290291
printf(_(" --recovery-target-action=pause|promote|shutdown\n"));

src/pg_probackup.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ static char *target_action = NULL;
7070
static pgRecoveryTarget *recovery_target_options = NULL;
7171

7272
bool restore_as_replica = false;
73+
bool restore_no_validate = false;
7374

7475
/* delete options */
7576
bool delete_wal = false;
@@ -148,6 +149,7 @@ static pgut_option options[] =
148149
{ 's', 25, "recovery-target-name", &target_name, SOURCE_CMDLINE },
149150
{ 's', 26, "recovery-target-action", &target_action, SOURCE_CMDLINE },
150151
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMDLINE },
152+
{ 'b', 27, "no-validate", &restore_no_validate, SOURCE_CMDLINE },
151153
/* delete options */
152154
{ 'b', 130, "wal", &delete_wal, SOURCE_CMDLINE },
153155
{ 'b', 131, "expired", &delete_expired, SOURCE_CMDLINE },
@@ -435,7 +437,7 @@ main(int argc, char *argv[])
435437
/* parse all recovery target options into recovery_target_options structure */
436438
recovery_target_options = parseRecoveryTargetOptions(target_time, target_xid,
437439
target_inclusive, target_tli, target_immediate,
438-
target_name, target_action);
440+
target_name, target_action, restore_no_validate);
439441
}
440442

441443
if (num_threads < 1)

src/pg_probackup.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ typedef struct pgRecoveryTarget
264264
bool recovery_target_immediate;
265265
const char *recovery_target_name;
266266
const char *recovery_target_action;
267+
bool restore_no_validate;
267268
} pgRecoveryTarget;
268269

269270
/* Union to ease operations on relation pages */
@@ -397,7 +398,7 @@ extern parray * readTimeLineHistory_probackup(TimeLineID targetTLI);
397398
extern pgRecoveryTarget *parseRecoveryTargetOptions(
398399
const char *target_time, const char *target_xid,
399400
const char *target_inclusive, TimeLineID target_tli, bool target_immediate,
400-
const char *target_name, const char *target_action);
401+
const char *target_name, const char *target_action, bool restore_no_validate);
401402

402403
extern void opt_tablespace_map(pgut_option *opt, const char *arg);
403404

src/restore.c

Lines changed: 66 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -243,66 +243,69 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
243243
if (is_restore)
244244
check_tablespace_mapping(dest_backup);
245245

246-
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
247-
elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time));
248-
249-
/*
250-
* Validate backups from base_full_backup to dest_backup.
251-
*/
252-
for (i = base_full_backup_index; i >= dest_backup_index; i--)
246+
if (!is_restore || !rt->restore_no_validate)
253247
{
254-
pgBackup *backup = (pgBackup *) parray_get(backups, i);
255-
pgBackupValidate(backup);
256-
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
257-
if (backup->status == BACKUP_STATUS_CORRUPT)
258-
{
259-
corrupted_backup = backup;
260-
corrupted_backup_index = i;
261-
break;
262-
}
263-
/* We do not validate WAL files of intermediate backups
264-
* It`s done to speed up restore
265-
*/
266-
}
267-
/* There is no point in wal validation
268-
* if there is corrupted backup between base_backup and dest_backup
269-
*/
270-
if (!corrupted_backup)
248+
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
249+
elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time));
250+
271251
/*
272-
* Validate corresponding WAL files.
273-
* We pass base_full_backup timeline as last argument to this function,
274-
* because it's needed to form the name of xlog file.
252+
* Validate backups from base_full_backup to dest_backup.
275253
*/
276-
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
277-
rt->recovery_target_xid, base_full_backup->tli);
278-
279-
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
280-
if (corrupted_backup)
281-
{
282-
for (i = corrupted_backup_index - 1; i >= 0; i--)
254+
for (i = base_full_backup_index; i >= dest_backup_index; i--)
283255
{
284256
pgBackup *backup = (pgBackup *) parray_get(backups, i);
285-
/* Mark incremental OK backup as orphan */
286-
if (backup->backup_mode == BACKUP_MODE_FULL)
257+
pgBackupValidate(backup);
258+
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
259+
if (backup->status == BACKUP_STATUS_CORRUPT)
260+
{
261+
corrupted_backup = backup;
262+
corrupted_backup_index = i;
287263
break;
288-
if (backup->status != BACKUP_STATUS_OK)
289-
continue;
290-
else
264+
}
265+
/* We do not validate WAL files of intermediate backups
266+
* It`s done to speed up restore
267+
*/
268+
}
269+
/* There is no point in wal validation
270+
* if there is corrupted backup between base_backup and dest_backup
271+
*/
272+
if (!corrupted_backup)
273+
/*
274+
* Validate corresponding WAL files.
275+
* We pass base_full_backup timeline as last argument to this function,
276+
* because it's needed to form the name of xlog file.
277+
*/
278+
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
279+
rt->recovery_target_xid, base_full_backup->tli);
280+
281+
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
282+
if (corrupted_backup)
283+
{
284+
for (i = corrupted_backup_index - 1; i >= 0; i--)
291285
{
292-
char *backup_id,
293-
*corrupted_backup_id;
286+
pgBackup *backup = (pgBackup *) parray_get(backups, i);
287+
/* Mark incremental OK backup as orphan */
288+
if (backup->backup_mode == BACKUP_MODE_FULL)
289+
break;
290+
if (backup->status != BACKUP_STATUS_OK)
291+
continue;
292+
else
293+
{
294+
char *backup_id,
295+
*corrupted_backup_id;
294296

295-
backup->status = BACKUP_STATUS_ORPHAN;
296-
pgBackupWriteBackupControlFile(backup);
297+
backup->status = BACKUP_STATUS_ORPHAN;
298+
pgBackupWriteBackupControlFile(backup);
297299

298-
backup_id = base36enc_dup(backup->start_time);
299-
corrupted_backup_id = base36enc_dup(corrupted_backup->start_time);
300+
backup_id = base36enc_dup(backup->start_time);
301+
corrupted_backup_id = base36enc_dup(corrupted_backup->start_time);
300302

301-
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
302-
backup_id, corrupted_backup_id);
303+
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
304+
backup_id, corrupted_backup_id);
303305

304-
free(backup_id);
305-
free(corrupted_backup_id);
306+
free(backup_id);
307+
free(corrupted_backup_id);
308+
}
306309
}
307310
}
308311
}
@@ -312,7 +315,12 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
312315
* produce corresponding error message
313316
*/
314317
if (dest_backup->status == BACKUP_STATUS_OK)
315-
elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time));
318+
{
319+
if (rt->restore_no_validate)
320+
elog(INFO, "Backup %s is used without validation.", base36enc(dest_backup->start_time));
321+
else
322+
elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time));
323+
}
316324
else if (dest_backup->status == BACKUP_STATUS_CORRUPT)
317325
elog(ERROR, "Backup %s is corrupt.", base36enc(dest_backup->start_time));
318326
else if (dest_backup->status == BACKUP_STATUS_ORPHAN)
@@ -1003,7 +1011,8 @@ parseRecoveryTargetOptions(const char *target_time,
10031011
TimeLineID target_tli,
10041012
bool target_immediate,
10051013
const char *target_name,
1006-
const char *target_action)
1014+
const char *target_action,
1015+
bool restore_no_validate)
10071016
{
10081017
time_t dummy_time;
10091018
TransactionId dummy_xid;
@@ -1028,6 +1037,7 @@ parseRecoveryTargetOptions(const char *target_time,
10281037
rt->recovery_target_immediate = false;
10291038
rt->recovery_target_name = NULL;
10301039
rt->recovery_target_action = NULL;
1040+
rt->restore_no_validate = false;
10311041

10321042
/* parse given options */
10331043
if (target_time)
@@ -1074,6 +1084,11 @@ parseRecoveryTargetOptions(const char *target_time,
10741084
rt->recovery_target_immediate = target_immediate;
10751085
}
10761086

1087+
if (restore_no_validate)
1088+
{
1089+
rt->restore_no_validate = restore_no_validate;
1090+
}
1091+
10771092
if (target_name)
10781093
{
10791094
recovery_target_specified++;

tests/validate_test.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,93 @@ def test_validate_instance_with_corrupted_page(self):
748748
# Clean after yourself
749749
self.del_test_dir(module_name, fname)
750750

751+
# @unittest.skip("skip")
752+
def test_validate_instance_with_corrupted_full_and_try_restore(self):
753+
"""make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups,
754+
corrupt file in FULL backup and run validate on instance,
755+
expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN,
756+
try to restore backup with --no-validation option"""
757+
fname = self.id().split('.')[3]
758+
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
759+
initdb_params=['--data-checksums'],
760+
pg_options={'wal_level': 'replica'}
761+
)
762+
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
763+
self.init_pb(backup_dir)
764+
self.add_instance(backup_dir, 'node', node)
765+
self.set_archiving(backup_dir, 'node', node)
766+
node.start()
767+
768+
node.safe_psql(
769+
"postgres",
770+
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
771+
file_path_t_heap = node.safe_psql(
772+
"postgres",
773+
"select pg_relation_filepath('t_heap')").rstrip()
774+
# FULL1
775+
backup_id_1 = self.backup_node(backup_dir, 'node', node)
776+
777+
node.safe_psql(
778+
"postgres",
779+
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
780+
# PAGE1
781+
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
782+
783+
# PAGE2
784+
node.safe_psql(
785+
"postgres",
786+
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i")
787+
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
788+
789+
# FULL1
790+
backup_id_4 = self.backup_node(backup_dir, 'node', node)
791+
792+
# PAGE3
793+
node.safe_psql(
794+
"postgres",
795+
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i")
796+
backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page')
797+
798+
# Corrupt some file in FULL backup
799+
file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap)
800+
with open(file_full, "rb+", 0) as f:
801+
f.seek(84)
802+
f.write(b"blah")
803+
f.flush()
804+
f.close
805+
806+
# Validate Instance
807+
try:
808+
self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose'])
809+
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
810+
repr(self.output), self.cmd))
811+
except ProbackupException as e:
812+
self.assertTrue(
813+
'INFO: Validating backup {0}'.format(backup_id_1) in e.message
814+
and "INFO: Validate backups of the instance 'node'" in e.message
815+
and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message
816+
and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message,
817+
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
818+
819+
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"')
820+
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"')
821+
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"')
822+
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"')
823+
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"')
824+
825+
node.cleanup()
826+
restore_out = self.restore_node(
827+
backup_dir, 'node', node,
828+
options=["--no-validate"])
829+
self.assertIn(
830+
"INFO: Restore of backup {0} completed.".format(backup_id_5),
831+
restore_out,
832+
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
833+
repr(self.output), self.cmd))
834+
835+
# Clean after yourself
836+
self.del_test_dir(module_name, fname)
837+
751838
# @unittest.skip("skip")
752839
def test_validate_instance_with_corrupted_full(self):
753840
"""make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups,
@@ -1582,3 +1669,62 @@ def test_validate_corrupted_full_1(self):
15821669

15831670
# Clean after yourself
15841671
self.del_test_dir(module_name, fname)
1672+
1673+
def test_file_size_corruption_no_validate(self):
1674+
1675+
fname = self.id().split('.')[3]
1676+
node = self.make_simple_node(
1677+
base_dir="{0}/{1}/node".format(module_name, fname),
1678+
# initdb_params=['--data-checksums'],
1679+
pg_options={'wal_level': 'replica'}
1680+
)
1681+
1682+
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
1683+
1684+
self.init_pb(backup_dir)
1685+
self.add_instance(backup_dir, 'node', node)
1686+
self.set_archiving(backup_dir, 'node', node)
1687+
1688+
node.start()
1689+
1690+
node.safe_psql(
1691+
"postgres",
1692+
"create table t_heap as select 1 as id, md5(i::text) as text, "
1693+
"md5(repeat(i::text,10))::tsvector as tsvector "
1694+
"from generate_series(0,1000) i")
1695+
node.safe_psql(
1696+
"postgres",
1697+
"CHECKPOINT;")
1698+
1699+
heap_path = node.safe_psql(
1700+
"postgres",
1701+
"select pg_relation_filepath('t_heap')").rstrip()
1702+
heap_size = node.safe_psql(
1703+
"postgres",
1704+
"select pg_relation_size('t_heap')")
1705+
1706+
backup_id = self.backup_node(
1707+
backup_dir, 'node', node, backup_type="full",
1708+
options=["-j", "4"], async=False, gdb=False)
1709+
1710+
node.stop()
1711+
node.cleanup()
1712+
1713+
# Let`s do file corruption
1714+
with open(os.path.join(backup_dir, "backups", 'node', backup_id, "database", heap_path), "rb+", 0) as f:
1715+
f.truncate(int(heap_size) - 4096)
1716+
f.flush()
1717+
f.close
1718+
1719+
node.cleanup()
1720+
1721+
try:
1722+
self.restore_node(
1723+
backup_dir, 'node', node,
1724+
options=["--no-validate"])
1725+
except ProbackupException as e:
1726+
self.assertTrue("ERROR: Data files restoring failed" in e.message, repr(e.message))
1727+
print "\nExpected error: \n" + e.message
1728+
1729+
# Clean after yourself
1730+
self.del_test_dir(module_name, fname)

0 commit comments

Comments
 (0)