Skip to content

Commit 93e85cb

Browse files
author
Aleksandr Parfenov
committed
Add --no-validate option for restore command
1 parent 6d709fb commit 93e85cb

File tree

4 files changed

+153
-52
lines changed

4 files changed

+153
-52
lines changed

src/pg_probackup.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ static char *target_action = NULL;;
6969
static pgRecoveryTarget *recovery_target_options = NULL;
7070

7171
bool restore_as_replica = false;
72+
bool restore_no_validate = false;
7273

7374
/* delete options */
7475
bool delete_wal = false;
@@ -143,6 +144,7 @@ static pgut_option options[] =
143144
{ 's', 25, "recovery-target-name", &target_name, SOURCE_CMDLINE },
144145
{ 's', 26, "recovery-target-action", &target_action, SOURCE_CMDLINE },
145146
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMDLINE },
147+
{ 'b', 27, "no-validate", &restore_no_validate, SOURCE_CMDLINE },
146148
/* delete options */
147149
{ 'b', 130, "wal", &delete_wal, SOURCE_CMDLINE },
148150
{ 'b', 131, "expired", &delete_expired, SOURCE_CMDLINE },
@@ -431,7 +433,7 @@ main(int argc, char *argv[])
431433
/* parse all recovery target options into recovery_target_options structure */
432434
recovery_target_options = parseRecoveryTargetOptions(target_time, target_xid,
433435
target_inclusive, target_tli, target_immediate,
434-
target_name, target_action);
436+
target_name, target_action, restore_no_validate);
435437
}
436438

437439
if (num_threads < 1)

src/pg_probackup.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ typedef struct pgRecoveryTarget
247247
bool recovery_target_immediate;
248248
const char *recovery_target_name;
249249
const char *recovery_target_action;
250+
bool restore_no_validate;
250251
} pgRecoveryTarget;
251252

252253
/* Union to ease operations on relation pages */
@@ -378,7 +379,7 @@ extern parray * readTimeLineHistory_probackup(TimeLineID targetTLI);
378379
extern pgRecoveryTarget *parseRecoveryTargetOptions(
379380
const char *target_time, const char *target_xid,
380381
const char *target_inclusive, TimeLineID target_tli, bool target_immediate,
381-
const char *target_name, const char *target_action);
382+
const char *target_name, const char *target_action, bool restore_no_validate);
382383

383384
extern void opt_tablespace_map(pgut_option *opt, const char *arg);
384385

src/restore.c

Lines changed: 61 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -243,66 +243,69 @@ do_restore_or_validate(time_t target_backup_id,
243243
if (is_restore)
244244
check_tablespace_mapping(dest_backup);
245245

246-
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
247-
elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time));
248-
249-
/*
250-
* Validate backups from base_full_backup to dest_backup.
251-
*/
252-
for (i = base_full_backup_index; i >= dest_backup_index; i--)
246+
if (!is_restore || !rt->restore_no_validate)
253247
{
254-
pgBackup *backup = (pgBackup *) parray_get(backups, i);
255-
pgBackupValidate(backup);
256-
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
257-
if (backup->status == BACKUP_STATUS_CORRUPT)
258-
{
259-
corrupted_backup = backup;
260-
corrupted_backup_index = i;
261-
break;
262-
}
263-
/* We do not validate WAL files of intermediate backups
264-
* It`s done to speed up restore
265-
*/
266-
}
267-
/* There is no point in wal validation
268-
* if there is corrupted backup between base_backup and dest_backup
269-
*/
270-
if (!corrupted_backup)
248+
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
249+
elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time));
250+
271251
/*
272-
* Validate corresponding WAL files.
273-
* We pass base_full_backup timeline as last argument to this function,
274-
* because it's needed to form the name of xlog file.
252+
* Validate backups from base_full_backup to dest_backup.
275253
*/
276-
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
277-
rt->recovery_target_xid, base_full_backup->tli);
278-
279-
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
280-
if (corrupted_backup)
281-
{
282-
for (i = corrupted_backup_index - 1; i >= 0; i--)
254+
for (i = base_full_backup_index; i >= dest_backup_index; i--)
283255
{
284256
pgBackup *backup = (pgBackup *) parray_get(backups, i);
285-
/* Mark incremental OK backup as orphan */
286-
if (backup->backup_mode == BACKUP_MODE_FULL)
257+
pgBackupValidate(backup);
258+
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
259+
if (backup->status == BACKUP_STATUS_CORRUPT)
260+
{
261+
corrupted_backup = backup;
262+
corrupted_backup_index = i;
287263
break;
288-
if (backup->status != BACKUP_STATUS_OK)
289-
continue;
290-
else
264+
}
265+
/* We do not validate WAL files of intermediate backups
266+
* It`s done to speed up restore
267+
*/
268+
}
269+
/* There is no point in wal validation
270+
* if there is corrupted backup between base_backup and dest_backup
271+
*/
272+
if (!corrupted_backup)
273+
/*
274+
* Validate corresponding WAL files.
275+
* We pass base_full_backup timeline as last argument to this function,
276+
* because it's needed to form the name of xlog file.
277+
*/
278+
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
279+
rt->recovery_target_xid, base_full_backup->tli);
280+
281+
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
282+
if (corrupted_backup)
283+
{
284+
for (i = corrupted_backup_index - 1; i >= 0; i--)
291285
{
292-
char *backup_id,
293-
*corrupted_backup_id;
286+
pgBackup *backup = (pgBackup *) parray_get(backups, i);
287+
/* Mark incremental OK backup as orphan */
288+
if (backup->backup_mode == BACKUP_MODE_FULL)
289+
break;
290+
if (backup->status != BACKUP_STATUS_OK)
291+
continue;
292+
else
293+
{
294+
char *backup_id,
295+
*corrupted_backup_id;
294296

295-
backup->status = BACKUP_STATUS_ORPHAN;
296-
pgBackupWriteBackupControlFile(backup);
297+
backup->status = BACKUP_STATUS_ORPHAN;
298+
pgBackupWriteBackupControlFile(backup);
297299

298-
backup_id = base36enc_dup(backup->start_time);
299-
corrupted_backup_id = base36enc_dup(corrupted_backup->start_time);
300+
backup_id = base36enc_dup(backup->start_time);
301+
corrupted_backup_id = base36enc_dup(corrupted_backup->start_time);
300302

301-
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
302-
backup_id, corrupted_backup_id);
303+
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
304+
backup_id, corrupted_backup_id);
303305

304-
free(backup_id);
305-
free(corrupted_backup_id);
306+
free(backup_id);
307+
free(corrupted_backup_id);
308+
}
306309
}
307310
}
308311
}
@@ -1001,7 +1004,8 @@ parseRecoveryTargetOptions(const char *target_time,
10011004
TimeLineID target_tli,
10021005
bool target_immediate,
10031006
const char *target_name,
1004-
const char *target_action)
1007+
const char *target_action,
1008+
bool restore_no_validate)
10051009
{
10061010
time_t dummy_time;
10071011
TransactionId dummy_xid;
@@ -1026,6 +1030,7 @@ parseRecoveryTargetOptions(const char *target_time,
10261030
rt->recovery_target_immediate = false;
10271031
rt->recovery_target_name = NULL;
10281032
rt->recovery_target_action = NULL;
1033+
rt->restore_no_validate = false;
10291034

10301035
/* parse given options */
10311036
if (target_time)
@@ -1072,6 +1077,12 @@ parseRecoveryTargetOptions(const char *target_time,
10721077
rt->recovery_target_immediate = target_immediate;
10731078
}
10741079

1080+
if (restore_no_validate)
1081+
{
1082+
recovery_target_specified++;
1083+
rt->restore_no_validate = restore_no_validate;
1084+
}
1085+
10751086
if (target_name)
10761087
{
10771088
recovery_target_specified++;

tests/validate_test.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,93 @@ def test_validate_instance_with_corrupted_page(self):
748748
# Clean after yourself
749749
self.del_test_dir(module_name, fname)
750750

751+
# @unittest.skip("skip")
752+
def test_validate_instance_with_corrupted_full_and_try_restore(self):
753+
"""make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups,
754+
corrupt file in FULL backup and run validate on instance,
755+
expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN,
756+
try to restore backup with --no-validation option"""
757+
fname = self.id().split('.')[3]
758+
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
759+
initdb_params=['--data-checksums'],
760+
pg_options={'wal_level': 'replica'}
761+
)
762+
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
763+
self.init_pb(backup_dir)
764+
self.add_instance(backup_dir, 'node', node)
765+
self.set_archiving(backup_dir, 'node', node)
766+
node.start()
767+
768+
node.safe_psql(
769+
"postgres",
770+
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
771+
file_path_t_heap = node.safe_psql(
772+
"postgres",
773+
"select pg_relation_filepath('t_heap')").rstrip()
774+
# FULL1
775+
backup_id_1 = self.backup_node(backup_dir, 'node', node)
776+
777+
node.safe_psql(
778+
"postgres",
779+
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
780+
# PAGE1
781+
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
782+
783+
# PAGE2
784+
node.safe_psql(
785+
"postgres",
786+
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i")
787+
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
788+
789+
# FULL1
790+
backup_id_4 = self.backup_node(backup_dir, 'node', node)
791+
792+
# PAGE3
793+
node.safe_psql(
794+
"postgres",
795+
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i")
796+
backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page')
797+
798+
# Corrupt some file in FULL backup
799+
file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap)
800+
with open(file_full, "rb+", 0) as f:
801+
f.seek(84)
802+
f.write(b"blah")
803+
f.flush()
804+
f.close
805+
806+
# Validate Instance
807+
try:
808+
self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose'])
809+
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
810+
repr(self.output), self.cmd))
811+
except ProbackupException as e:
812+
self.assertTrue(
813+
'INFO: Validating backup {0}'.format(backup_id_1) in e.message
814+
and "INFO: Validate backups of the instance 'node'" in e.message
815+
and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message
816+
and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message,
817+
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
818+
819+
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"')
820+
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"')
821+
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"')
822+
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"')
823+
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"')
824+
825+
node.cleanup()
826+
restore_out = self.restore_node(
827+
backup_dir, 'node', node,
828+
options=["--no-validate"])
829+
self.assertIn(
830+
"INFO: Restore of backup {0} completed.".format(backup_id_5),
831+
restore_out,
832+
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
833+
repr(self.output), self.cmd))
834+
835+
# Clean after yourself
836+
self.del_test_dir(module_name, fname)
837+
751838
# @unittest.skip("skip")
752839
def test_validate_instance_with_corrupted_full(self):
753840
"""make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups,

0 commit comments

Comments
 (0)