Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7174,6 +7174,11 @@ public synchronized void verifyToken(DelegationTokenIdentifier identifier,
public EditLogTailer getEditLogTailer() {
return editLogTailer;
}

@VisibleForTesting
public long getStandbyLastCheckpointTime() {
return standbyCheckpointer.getLastCheckpointTime();
}

@VisibleForTesting
public void setEditLogTailerForTests(EditLogTailer tailer) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ void writeTransactionIdFile(StorageDirectory sd, long txid)
* @param time time of the last checkpoint, in millis since the epoch
*/
void setMostRecentCheckpointInfo(long txid, long time) {
LOG.info("setMostRecentCheckpointInfo txid is {}, time is {}", txid, time);
this.mostRecentCheckpointTxId = txid;
this.mostRecentCheckpointTime = time;
}
Expand All @@ -486,7 +487,7 @@ public long getMostRecentCheckpointTxId() {
/**
* @return the time of the most recent checkpoint in millis since the epoch.
*/
long getMostRecentCheckpointTime() {
public long getMostRecentCheckpointTime() {
return mostRecentCheckpointTime;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,11 @@ static int getCanceledCount() {
return canceledCount;
}

@VisibleForTesting
public long getLastCheckpointTime() {
return lastCheckpointTime;
}

private long countUncheckpointedTxns() {
FSImage img = namesystem.getFSImage();
return img.getCorrectLastAppliedOrWrittenTxId() -
Expand Down Expand Up @@ -461,7 +466,8 @@ private void doWork() {
} else if (secsSinceLast >= checkpointConf.getPeriod()) {
LOG.info("Triggering checkpoint because it has been {} seconds " +
"since the last checkpoint, which exceeds the configured " +
"interval {}", secsSinceLast, checkpointConf.getPeriod());
"interval {}, And now is {}, lastCheckpointTime is {}.",
secsSinceLast, checkpointConf.getPeriod(), now, lastCheckpointTime);
needCheckpoint = true;
}

Expand All @@ -487,8 +493,9 @@ private void doWork() {
namesystem.setCreatedRollbackImages(true);
namesystem.setNeedRollbackFsImage(false);
}
lastCheckpointTime = now;
LOG.info("Checkpoint finished successfully.");
lastCheckpointTime = monotonicNow();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great catch here. Thanks.

LOG.info("Checkpoint finished successfully, the lastCheckpointTime is:{}.",
lastCheckpointTime);
}
} catch (SaveNamespaceCancelledException ce) {
LOG.info("Checkpoint was cancelled: {}", ce.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,52 @@ public void testCheckpointSucceedsWithLegacyOIVException() throws Exception {
HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));
}

/**
* Test that lastCheckpointTime is correctly updated at each checkpoint.
*/
@Test(timeout = 300000)
public void testLastCheckpointTime() throws Exception {
for (int i = 1; i < NUM_NNS; i++) {
cluster.shutdownNameNode(i);

// Make true checkpoint for DFS_NAMENODE_CHECKPOINT_PERIOD_KEY
cluster.getConfiguration(i).setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 3);
cluster.getConfiguration(i).setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1000);
}
doEdits(0, 10);
cluster.transitionToStandby(0);

// Standby NNs do checkpoint without active NN available.
for (int i = 1; i < NUM_NNS; i++) {
cluster.restartNameNode(i, false);
}
cluster.waitClusterUp();
setNNs();

for (int i = 0; i < NUM_NNS; i++) {
// Once the standby catches up, it should do a checkpoint
// and save to local directories.
HATestUtil.waitForCheckpoint(cluster, i, ImmutableList.of(12));
}

long snnCheckpointTime1 = nns[1].getNamesystem().getStandbyLastCheckpointTime();
long annCheckpointTime1 = nns[0].getNamesystem().getLastCheckpointTime();
cluster.transitionToActive(0);
cluster.transitionToObserver(2);

doEdits(11, 20);
nns[0].getRpcServer().rollEditLog();
HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(23));

long snnCheckpointTime2 = nns[1].getNamesystem().getStandbyLastCheckpointTime();
long annCheckpointTime2 = nns[0].getNamesystem().getLastCheckpointTime();

// Make sure that both standby and active NNs' lastCheckpointTime intervals are larger
// than 3 DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY.
assertTrue(snnCheckpointTime2 - snnCheckpointTime1 >= 3000
&& annCheckpointTime2 - annCheckpointTime1 >= 3000);
}

private void doEdits(int start, int stop) throws IOException {
for (int i = start; i < stop; i++) {
Path p = new Path("/test" + i);
Expand Down