Skip to content

Commit b3d7c34

Browse files
authored
CNDB-9697: notify JVMStabilityInspector when encountered corruption exception on compaction task (#1901)
### What is the issue Corruption excetion from compaction task didn't notify error handler. Compaction aborted/failed metrics were not updated in some cases. ### What does this PR fix and why was it fixed Trigger JVMStabilityInspector for corruption exception from compaction task and update aborted/failed metircs properly. Trigger JVMStabilityInspecto for error from repair validation.
1 parent 0f50569 commit b3d7c34

14 files changed

+276
-35
lines changed

src/java/org/apache/cassandra/db/compaction/BackgroundCompactionRunner.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,10 @@
4747
import org.apache.cassandra.io.FSDiskFullWriteError;
4848
import org.apache.cassandra.io.FSError;
4949
import org.apache.cassandra.io.FSWriteError;
50+
import org.apache.cassandra.io.compress.CorruptBlockException;
51+
import org.apache.cassandra.io.sstable.CorruptSSTableException;
5052
import org.apache.cassandra.io.sstable.format.SSTableReader;
53+
import org.apache.cassandra.io.util.CorruptFileException;
5154
import org.apache.cassandra.utils.FBUtilities;
5255
import org.apache.cassandra.utils.JVMStabilityInspector;
5356
import org.apache.cassandra.utils.Throwables;
@@ -460,6 +463,7 @@ public static void handleCompactionError(Throwable t, ColumnFamilyStore cfs)
460463
// we might have to rely on error message parsing...
461464
t = t instanceof FSError ? t : new FSWriteError(t);
462465
JVMStabilityInspector.inspectThrowable(t);
466+
CompactionManager.instance.incrementFailed();
463467
}
464468
// No-Space-Left IO exception is thrown by JDK when disk has reached its capacity. The key difference between this
465469
// and the earlier case with `FSDiskFullWriteError` is that here we have definitively run out of disk space, and
@@ -471,15 +475,26 @@ else if (Throwables.isCausedBy(t, IOException.class) && t.toString().contains(NO
471475
// wrap it with FSWriteError so that JVMStabilityInspector can properly stop or die
472476
t = t instanceof FSError ? t : new FSWriteError(t);
473477
JVMStabilityInspector.inspectThrowable(t);
478+
CompactionManager.instance.incrementFailed();
474479
}
475480
else if (Throwables.isCausedBy(t, OutOfMemoryError.class))
476481
{
477482
logger.error("Encountered out of memory error on {}", cfs, t);
478483
JVMStabilityInspector.inspectThrowable(t);
484+
CompactionManager.instance.incrementFailed();
485+
}
486+
else if (Throwables.anyCauseMatches(t, err -> err instanceof CorruptBlockException
487+
|| err instanceof CorruptFileException
488+
|| err instanceof CorruptSSTableException))
489+
{
490+
logger.error("Encountered corruption exception on {}", cfs, t);
491+
JVMStabilityInspector.inspectThrowable(t);
492+
CompactionManager.instance.incrementFailed();
479493
}
480494
else if (t instanceof CompactionInterruptedException)
481495
{
482496
logger.warn(String.format("Aborting background compaction of %s due to interruption", cfs), Throwables.unwrapped(t));
497+
CompactionManager.instance.incrementAborted();
483498
}
484499
else
485500
{

src/java/org/apache/cassandra/io/FSDiskFullWriteError.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,20 @@
2222

2323
public class FSDiskFullWriteError extends FSWriteError
2424
{
25+
private final String keyspace;
26+
2527
public FSDiskFullWriteError(String keyspace, long mutationSize)
2628
{
2729
super(new IOException(String.format("Insufficient disk space to write %d bytes into the %s keyspace",
2830
mutationSize,
2931
keyspace)));
32+
33+
this.keyspace = keyspace;
34+
}
35+
36+
public String getKeyspace()
37+
{
38+
return keyspace;
3039
}
3140

3241
@Override

src/java/org/apache/cassandra/io/FSNoDiskAvailableForWriteError.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,19 @@
2525
*/
2626
public class FSNoDiskAvailableForWriteError extends FSWriteError
2727
{
28+
private final String keyspace;
29+
2830
public FSNoDiskAvailableForWriteError(String keyspace)
2931
{
3032
super(new IOException(String.format("The data directories for the %s keyspace have been marked as unwritable",
3133
keyspace)));
34+
35+
this.keyspace = keyspace;
36+
}
37+
38+
public String getKeyspace()
39+
{
40+
return keyspace;
3241
}
3342

3443
@Override

src/java/org/apache/cassandra/io/compress/CompressedSequentialWriter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ private synchronized void doResetAndTruncate(DataPosition mark)
300300
}
301301
catch (IOException e)
302302
{
303-
throw new CorruptBlockException(getFile().toString(), chunkOffset, chunkSize, e);
303+
throw new CorruptBlockException(getFile(), chunkOffset, chunkSize, e);
304304
}
305305

306306
CRC32 checksum = new CRC32();
@@ -313,15 +313,15 @@ private synchronized void doResetAndTruncate(DataPosition mark)
313313
int storedChecksum = crcCheckBuffer.getInt();
314314
int computedChecksum = (int) checksum.getValue();
315315
if (storedChecksum != computedChecksum)
316-
throw new CorruptBlockException(getFile().toString(), chunkOffset, chunkSize, storedChecksum, computedChecksum);
316+
throw new CorruptBlockException(getFile(), chunkOffset, chunkSize, storedChecksum, computedChecksum);
317317
}
318318
catch (CorruptBlockException e)
319319
{
320320
throw new CorruptSSTableException(e, getFile());
321321
}
322322
catch (EOFException e)
323323
{
324-
throw new CorruptSSTableException(new CorruptBlockException(getFile().toString(), chunkOffset, chunkSize), getFile());
324+
throw new CorruptSSTableException(new CorruptBlockException(getFile(), chunkOffset, chunkSize), getFile());
325325
}
326326
catch (IOException e)
327327
{

src/java/org/apache/cassandra/io/compress/CorruptBlockException.java

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,37 +19,47 @@
1919

2020
import java.io.IOException;
2121

22+
import org.apache.cassandra.io.util.File;
23+
2224
public class CorruptBlockException extends IOException
2325
{
24-
public CorruptBlockException(String filePath, CompressionMetadata.Chunk chunk)
26+
private final File file;
27+
28+
public CorruptBlockException(File file, CompressionMetadata.Chunk chunk)
2529
{
26-
this(filePath, chunk, null);
30+
this(file, chunk, null);
2731
}
2832

29-
public CorruptBlockException(String filePath, CompressionMetadata.Chunk chunk, Throwable cause)
33+
public CorruptBlockException(File file, CompressionMetadata.Chunk chunk, Throwable cause)
3034
{
31-
this(filePath, chunk.offset, chunk.length, cause);
35+
this(file, chunk.offset, chunk.length, cause);
3236
}
3337

34-
public CorruptBlockException(String filePath, long offset, int length)
38+
public CorruptBlockException(File file, long offset, int length)
3539
{
36-
this(filePath, offset, length, null);
40+
this(file, offset, length, null);
3741
}
3842

39-
public CorruptBlockException(String filePath, long offset, int length, Throwable cause)
43+
public CorruptBlockException(File file, long offset, int length, Throwable cause)
4044
{
41-
super(String.format("(%s): corruption detected, chunk at %d of length %d.", filePath, offset, length), cause);
45+
super(String.format("(%s): corruption detected, chunk at %d of length %d.", file.toString(), offset, length), cause);
46+
this.file = file;
4247
}
4348

44-
public CorruptBlockException(String filePath, CompressionMetadata.Chunk chunk, int storedChecksum, int calculatedChecksum)
49+
public CorruptBlockException(File file, CompressionMetadata.Chunk chunk, int storedChecksum, int calculatedChecksum)
4550
{
46-
this(filePath, chunk.offset, chunk.length, storedChecksum, calculatedChecksum);
51+
this(file, chunk.offset, chunk.length, storedChecksum, calculatedChecksum);
4752
}
4853

49-
public CorruptBlockException(String filePath, long offset, int length, int storedChecksum, int calculatedChecksum)
54+
public CorruptBlockException(File file, long offset, int length, int storedChecksum, int calculatedChecksum)
5055
{
5156
super(String.format("(%s): corruption detected, chunk at %d of length %d has mismatched checksums. Expected %d, but calculated %d",
52-
filePath, offset, length, storedChecksum, calculatedChecksum));
57+
file.toString(), offset, length, storedChecksum, calculatedChecksum));
58+
this.file = file;
5359
}
5460

61+
public File getFile()
62+
{
63+
return file;
64+
}
5565
}

src/java/org/apache/cassandra/io/compress/EncryptedSequentialWriter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ public synchronized void resetAndTruncate(DataPosition mark)
221221
encrypted.limit(CHUNK_SIZE);
222222

223223
if (encrypted.getInt(CHUNK_SIZE - 4) != (int) checksum.getValue())
224-
throw new CorruptBlockException(getFile().toString(), truncateChunk, CHUNK_SIZE);
224+
throw new CorruptBlockException(getFile(), truncateChunk, CHUNK_SIZE);
225225

226226
try
227227
{
@@ -233,7 +233,7 @@ public synchronized void resetAndTruncate(DataPosition mark)
233233
}
234234
catch (IOException e)
235235
{
236-
throw new CorruptBlockException(getFile().toString(), truncateChunk, CHUNK_SIZE, e);
236+
throw new CorruptBlockException(getFile(), truncateChunk, CHUNK_SIZE, e);
237237
}
238238
}
239239
catch (CorruptBlockException e)
@@ -242,7 +242,7 @@ public synchronized void resetAndTruncate(DataPosition mark)
242242
}
243243
catch (EOFException e)
244244
{
245-
throw new CorruptSSTableException(new CorruptBlockException(getFile().toString(), truncateChunk, CHUNK_SIZE), getFile());
245+
throw new CorruptSSTableException(new CorruptBlockException(getFile(), truncateChunk, CHUNK_SIZE), getFile());
246246
}
247247
catch (IOException e)
248248
{

src/java/org/apache/cassandra/io/sstable/CorruptSSTableException.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.nio.file.Path;
2121

2222
import org.apache.cassandra.io.util.File;
23+
import org.apache.cassandra.io.util.PathUtils;
2324
import org.apache.cassandra.utils.DseLegacy;
2425

2526
public class CorruptSSTableException extends RuntimeException
@@ -34,7 +35,7 @@ public CorruptSSTableException(Throwable cause, File file)
3435

3536
public CorruptSSTableException(Throwable cause, String path)
3637
{
37-
this(cause, new File(path));
38+
this(cause, new File(PathUtils.getPath(path)));
3839
}
3940

4041
protected CorruptSSTableException(String msg, Throwable cause, File file)
@@ -48,5 +49,4 @@ public CorruptSSTableException(Throwable cause, Path path)
4849
{
4950
this(cause, new File(path));
5051
}
51-
5252
}

src/java/org/apache/cassandra/io/util/ChecksummedRebufferer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public BufferHolder rebuffer(long desiredPosition)
5050
}
5151
catch (IOException e)
5252
{
53-
throw new CorruptFileException(e, channel().filePath());
53+
throw new CorruptFileException(e, channel().getFile());
5454
}
5555

5656
return this;

src/java/org/apache/cassandra/io/util/CompressedChunkReader.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ public void readChunk(long position, ByteBuffer uncompressed)
140140
{
141141
compressed.limit(length);
142142
if (channel.read(compressed, chunkOffset) != length)
143-
throw new CorruptBlockException(channel.filePath(), chunk);
143+
throw new CorruptBlockException(channel.getFile(), chunk);
144144

145145
if (shouldCheckCrc)
146146
{
@@ -151,7 +151,7 @@ public void readChunk(long position, ByteBuffer uncompressed)
151151
compressed.limit(length);
152152
int storedChecksum = compressed.getInt();
153153
if (storedChecksum != checksum)
154-
throw new CorruptBlockException(channel.filePath(), chunk, storedChecksum, checksum);
154+
throw new CorruptBlockException(channel.getFile(), chunk, storedChecksum, checksum);
155155
}
156156

157157
compressed.position(0).limit(chunk.length);
@@ -171,7 +171,7 @@ public void readChunk(long position, ByteBuffer uncompressed)
171171
}
172172
catch (IOException e)
173173
{
174-
throw new CorruptBlockException(channel.filePath(), chunk, e);
174+
throw new CorruptBlockException(channel.getFile(), chunk, e);
175175
}
176176
}
177177
finally
@@ -183,7 +183,7 @@ public void readChunk(long position, ByteBuffer uncompressed)
183183
{
184184
uncompressed.position(0).limit(chunk.length);
185185
if (channel.read(uncompressed, chunkOffset) != chunk.length)
186-
throw new CorruptBlockException(channel.filePath(), chunk);
186+
throw new CorruptBlockException(channel.getFile(), chunk);
187187
}
188188
uncompressed.flip();
189189
}
@@ -239,7 +239,7 @@ public void readChunk(long position, ByteBuffer uncompressed)
239239
compressedChunk.limit(compressedChunk.capacity());
240240
int storedChecksum = compressedChunk.getInt();
241241
if (storedChecksum != checksum)
242-
throw new CorruptBlockException(channel.filePath(), chunk, storedChecksum, checksum);
242+
throw new CorruptBlockException(channel.getFile(), chunk, storedChecksum, checksum);
243243
}
244244

245245
compressedChunk.position(chunkOffsetInSegment).limit(chunkOffsetInSegment + chunk.length);
@@ -257,7 +257,7 @@ public void readChunk(long position, ByteBuffer uncompressed)
257257
}
258258
catch (IOException e)
259259
{
260-
throw new CorruptBlockException(channel.filePath(), chunk, e);
260+
throw new CorruptBlockException(channel.getFile(), chunk, e);
261261
}
262262
uncompressed.flip();
263263
}

src/java/org/apache/cassandra/io/util/CorruptFileException.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,16 @@
2121
@SuppressWarnings("serial")
2222
public class CorruptFileException extends RuntimeException
2323
{
24-
public final String filePath;
24+
public final File file;
2525

26-
public CorruptFileException(Exception cause, String filePath)
26+
public CorruptFileException(Exception cause, File file)
2727
{
2828
super(cause);
29-
this.filePath = filePath;
29+
this.file = file;
30+
}
31+
32+
public File getFile()
33+
{
34+
return file;
3035
}
3136
}

0 commit comments

Comments
 (0)