Skip to content

Commit 635dfb7

Browse files
committed
Merge branch 'zoran/CON-1035-divergence-check' into 'master'
fix(ic-backup, CON-1035) Improve divergence check in order to reduce flakiness Assertion will not fail the test as a second process is still running, hence moved to the end. Added some debugging for occasional failure to detect divergence. See merge request dfinity-lab/public/ic!12514
2 parents 92e6bc8 + d87bc22 commit 635dfb7

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

rs/backup/src/backup_helper.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,10 @@ impl BackupHelper {
504504
"{}_{:010}_{:012}.log",
505505
self.subnet_id, timestamp, start_height
506506
);
507-
let mut file = File::create(self.logs_dir().join(log_file_name))
508-
.map_err(|err| format!("Error creating log file: {:?}", err))?;
507+
let file_name = self.logs_dir().join(log_file_name);
508+
debug!(self.log, "Write replay log to: {:?}", file_name);
509+
let mut file =
510+
File::create(file_name).map_err(|err| format!("Error creating log file: {:?}", err))?;
509511
file.write_all(stdout.as_bytes())
510512
.map_err(|err| format!("Error writing log file: {:?}", err))?;
511513
Ok(())

rs/tests/src/orchestrator/backup_manager.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,10 @@ pub fn test(env: TestEnv) {
207207
info!(log, "Start the backup process in a separate thread");
208208
let ic_backup_path = binaries_path.join("ic-backup");
209209
let mut command = Command::new(&ic_backup_path);
210-
command.arg("--config-file").arg(&config_file);
210+
command
211+
.arg("--config-file")
212+
.arg(&config_file)
213+
.arg("--debug");
211214
info!(log, "Will execute: {:?}", command);
212215

213216
let mut child = command
@@ -299,8 +302,8 @@ pub fn test(env: TestEnv) {
299302
.join(canister_id_hex)
300303
.join("vmemory_0.bin");
301304
assert!(memory_artifact_path.exists());
305+
info!(log, "Modify memory file: {:?}", memory_artifact_path);
302306
modify_byte_in_file(memory_artifact_path).expect("Modifying a byte failed");
303-
info!(log, "Modified memory file");
304307

305308
let mut command = Command::new(&ic_backup_path);
306309
command
@@ -321,14 +324,18 @@ pub fn test(env: TestEnv) {
321324
info!(log, "Artifacts and states are moved to cold storage");
322325

323326
let mut hash_mismatch = false;
324-
for _ in 0..13 {
327+
for i in 0..60 {
325328
info!(log, "Checking logs for hash mismatch...");
326329
if let Ok(dirs) = fs::read_dir(backup_dir.join("logs")) {
327330
for en in dirs {
331+
info!(log, "DirEntry in logs: {:?}", en);
328332
match en {
329333
Ok(d) => {
330334
let contents = fs::read_to_string(d.path())
331335
.expect("Should have been able to read the log file");
336+
if i == 15 {
337+
println!("{}", contents);
338+
}
332339

333340
if contents.contains(DIVERGENCE_LOG_STR) {
334341
hash_mismatch = true;
@@ -346,11 +353,12 @@ pub fn test(env: TestEnv) {
346353
}
347354
sleep_secs(10);
348355
}
349-
assert!(hash_mismatch);
350-
info!(log, "There was a divergence of the state");
351356

352357
info!(log, "Kill child process");
353358
child.kill().expect("Error killing backup process");
359+
360+
assert!(hash_mismatch);
361+
info!(log, "There was a divergence of the state");
354362
}
355363

356364
fn some_checkpoint_dir(backup_dir: &Path, subnet_id: &SubnetId) -> Option<PathBuf> {

0 commit comments

Comments
 (0)