Skip to content

Commit 840fc75

Browse files
HDFS-17365. EC: Add extra redunency configuration in checkStreamerFailures to prevent data loss. (#6517) Contributed by hfutatzhanghb.
Reviewed-by: Takanobu Asanuma <[email protected]> Signed-off-by: Shuyan Zhang <[email protected]>
1 parent 76ed292 commit 840fc75

File tree

4 files changed

+39
-5
lines changed

4 files changed

+39
-5
lines changed

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@
7373
import java.util.concurrent.LinkedBlockingQueue;
7474
import java.util.concurrent.TimeUnit;
7575

76+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedundancy.DFS_CLIENT_EC_WRITE_FAILED_BLOCKS_TOLERATED;
77+
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Write.ECRedundancy.DFS_CLIENT_EC_WRITE_FAILED_BLOCKS_TOLERATED_DEFAILT;
78+
7679
/**
7780
* This class supports writing files in striped layout and erasure coded format.
7881
* Each stripe contains a sequence of cells.
@@ -283,6 +286,7 @@ private void flipDataBuffers() {
283286
private CompletionService<Void> flushAllExecutorCompletionService;
284287
private int blockGroupIndex;
285288
private long datanodeRestartTimeout;
289+
private final int failedBlocksTolerated;
286290

287291
/** Construct a new output stream for creating a file. */
288292
DFSStripedOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat,
@@ -322,6 +326,15 @@ private void flipDataBuffers() {
322326
currentPackets = new DFSPacket[streamers.size()];
323327
datanodeRestartTimeout = dfsClient.getConf().getDatanodeRestartTimeout();
324328
setCurrentStreamer(0);
329+
330+
int failedBlocksToleratedTmp = dfsClient.getConfiguration().getInt(
331+
DFS_CLIENT_EC_WRITE_FAILED_BLOCKS_TOLERATED,
332+
DFS_CLIENT_EC_WRITE_FAILED_BLOCKS_TOLERATED_DEFAILT);
333+
if (failedBlocksToleratedTmp < 0) {
334+
failedBlocksToleratedTmp = ecPolicy.getNumParityUnits();
335+
}
336+
failedBlocksTolerated = Math.min(failedBlocksToleratedTmp,
337+
ecPolicy.getNumParityUnits());
325338
}
326339

327340
/** Construct a new output stream for appending to a file. */
@@ -402,11 +415,11 @@ private Set<StripedDataStreamer> checkStreamers() throws IOException {
402415
LOG.debug("original failed streamers: {}", failedStreamers);
403416
LOG.debug("newly failed streamers: {}", newFailed);
404417
}
405-
if (failCount > (numAllBlocks - numDataBlocks)) {
418+
if (failCount > failedBlocksTolerated) {
406419
closeAllStreamers();
407420
throw new IOException("Failed: the number of failed blocks = "
408-
+ failCount + " > the number of parity blocks = "
409-
+ (numAllBlocks - numDataBlocks));
421+
+ failCount + " > the number of failed blocks tolerated = "
422+
+ failedBlocksTolerated);
410423
}
411424
return newFailed;
412425
}
@@ -687,7 +700,7 @@ private void checkStreamerFailures(boolean isNeedFlushAllPackets)
687700
// 2) create new block outputstream
688701
newFailed = waitCreatingStreamers(healthySet);
689702
if (newFailed.size() + failedStreamers.size() >
690-
numAllBlocks - numDataBlocks) {
703+
failedBlocksTolerated) {
691704
// The write has failed, Close all the streamers.
692705
closeAllStreamers();
693706
throw new IOException(

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,13 @@ interface ByteArrayManager {
427427
PREFIX + "count-reset-time-period-ms";
428428
long COUNT_RESET_TIME_PERIOD_MS_DEFAULT = 10 * MS_PER_SECOND;
429429
}
430+
431+
@SuppressWarnings("checkstyle:InterfaceIsType")
432+
interface ECRedundancy {
433+
String DFS_CLIENT_EC_WRITE_FAILED_BLOCKS_TOLERATED =
434+
"dfs.client.ec.write.failed.blocks.tolerated";
435+
int DFS_CLIENT_EC_WRITE_FAILED_BLOCKS_TOLERATED_DEFAILT = -1;
436+
}
430437
}
431438

432439
/** dfs.client.block.write configuration properties */

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3923,6 +3923,19 @@
39233923
</description>
39243924
</property>
39253925

3926+
<property>
3927+
<name>dfs.client.ec.write.failed.blocks.tolerated</name>
3928+
<value>-1</value>
3929+
<description>
3930+
Provide extra tolerated failed streamer for ec policy to prevent
3931+
the potential data loss. For example, if we use RS-6-3-1024K ec policy.
3932+
We can write successfully when there are 3 failure streamers. But if one of the six
3933+
replicas lost during reconstruction, we may lose the data forever.
3934+
It should better configured between [0, numParityBlocks], the default value is -1 which
3935+
means the parity block number of the specified ec policy we are using.
3936+
</description>
3937+
</property>
3938+
39263939
<property>
39273940
<name>dfs.namenode.quota.init-threads</name>
39283941
<value>12</value>

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestHdfsConfigFields.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ public void initializeMemberVariables() {
4747
HdfsClientConfigKeys.Read.class, HdfsClientConfigKeys.HedgedRead.class,
4848
HdfsClientConfigKeys.ShortCircuit.class,
4949
HdfsClientConfigKeys.Retry.class, HdfsClientConfigKeys.Mmap.class,
50-
HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.class };
50+
HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.class,
51+
HdfsClientConfigKeys.Write.ECRedundancy.class};
5152

5253
// Set error modes
5354
errorIfMissingConfigProps = true;

0 commit comments

Comments
 (0)