Skip to content

Commit 0a5a124

Browse files
committed
MmapLongIdIndex: improve consistency check, add paranoia mode
1 parent 1e49f3d commit 0a5a124

File tree

1 file changed

+238
-40
lines changed

1 file changed

+238
-40
lines changed

dump/src/util/dump/MmapLongIdIndex.java

Lines changed: 238 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@
2828

2929
import gnu.trove.list.TLongList;
3030
import gnu.trove.list.array.TLongArrayList;
31+
import gnu.trove.map.TLongLongMap;
32+
import gnu.trove.map.hash.TLongLongHashMap;
3133
import jdk.incubator.foreign.GroupLayout;
3234
import jdk.incubator.foreign.MemoryLayout;
3335
import jdk.incubator.foreign.MemoryLayout.PathElement;
3436
import jdk.incubator.foreign.MemorySegment;
3537
import jdk.incubator.foreign.ResourceScope;
38+
import util.dump.cache.LRUCache;
3639
import util.dump.reflection.FieldAccessor;
3740
import util.dump.reflection.FieldFieldAccessor;
3841
import util.dump.reflection.Reflection;
@@ -50,6 +53,8 @@ public abstract class MmapLongIdIndex<E> extends DumpIndex<E> implements UniqueC
5053

5154
private static final Logger _log = LoggerFactory.getLogger(MmapLongIdIndex.class);
5255

56+
private static final boolean PARANOIA_MODE = true;
57+
5358
private static final VarHandle LONG_ARRAY_ACCESS = sequenceLayout(JAVA_LONG).varHandle(long.class, sequenceElement());
5459

5560
public static <E> MmapLongIdIndex<E> forClosedRange( Dump<E> dump, String fieldName, long minKey, long maxKey ) throws NoSuchFieldException {
@@ -95,7 +100,7 @@ private static void longArraySetVolatile( MemorySegment array, long index, long
95100
protected final long _minKey;
96101
protected final long _maxKey;
97102

98-
private final HeaderCorrections _headerCorrections = new HeaderCorrections();
103+
private final IndexCorrections _indexCorrections = new IndexCorrections();
99104

100105
protected FileLayout _fileLayout;
101106
private Header _header;
@@ -277,9 +282,13 @@ protected boolean checkMeta() {
277282
}
278283

279284
protected boolean checkNumKeys( Header header ) {
280-
if ( header.getNumKeys() <= 0 ) {
285+
if ( header.getNumKeys() < 0 ) {
286+
return false;
287+
}
288+
if ( header.getNumKeys() == 0 && _dump.getDumpSize() > 0 ) {
281289
return false;
282290
}
291+
283292
return header.getNumKeys() <= header.getTableBytes() / Long.BYTES;
284293
}
285294

@@ -399,46 +408,28 @@ private boolean add0( E elem, long pos ) {
399408
}
400409

401410
private void applyHeaderCorrections() {
402-
if ( _headerCorrections.numKeys != null ) {
403-
_header.setNumKeys(_headerCorrections.numKeys);
411+
if ( _indexCorrections.numKeys != null ) {
412+
_log.info("{} fixing header...", _lookupPath.getFileName());
413+
_header.setNumKeys(_indexCorrections.numKeys);
404414
}
405415
}
406416

407-
private boolean checkConsistency( Header header, MemorySegment tableSegment ) {
408-
boolean consistent = true;
409-
long start = System.nanoTime();
417+
private void applyTableCorrections() {
418+
if ( _indexCorrections.rawContentCorrections != null ) {
419+
_log.info("{} fixing table...", _lookupPath.getFileName());
420+
_indexCorrections.rawContentCorrections.forEachEntry(( index, posInfo ) -> {
421+
longArraySet(_tableSegment, index, posInfo);
422+
return true;
423+
});
410424

411-
long dumpSize = _dump._outputStream._n;
412-
long numKeys = 0;
413-
for ( long i = 0, n = capacity(tableSegment); i < n; ++i ) {
414-
// not used during live operation, hence concurrency is not an issue
415-
long pos = getPosAt(tableSegment, i);
416-
if ( pos >= 0 ) {
417-
if ( _dump._deletedPositions.contains(pos) ) {
418-
_log.warn("{} Found deleted position! Will rebuild index.", _lookupPath.getFileName());
419-
consistent = false; // TODO: probably just an incomplete write during deletion, add offending positions to header corrections for later removal
420-
}
421-
if ( pos >= dumpSize ) {
422-
_log.warn("{} Found position {} beyond the end of the dump file with size {}! Will rebuild index.", _lookupPath.getFileName(), pos, dumpSize);
423-
consistent = false;
424-
}
425-
426-
++numKeys;
425+
if ( _header.getNumKeys() != getAllLongKeys().length ) {
426+
throw new IllegalStateException(_lookupPath.getFileName() + " inconsistencies post-fixup, wtf");
427427
}
428428
}
429+
}
429430

430-
if ( numKeys != header.getNumKeys() ) {
431-
_log.info("{} numKeys differ between actual table ({}) and caching header ({}), fixing.", _lookupPath.getFileName(), numKeys, header.getNumKeys());
432-
_headerCorrections.numKeys = numKeys;
433-
}
434-
435-
Duration duration = Duration.ofNanos(System.nanoTime() - start);
436-
if ( consistent ) {
437-
_log.info("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "consistent", duration);
438-
} else {
439-
_log.warn("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "inconsistent", duration);
440-
}
441-
return consistent;
431+
private boolean checkConsistency( Header header, MemorySegment tableSegment ) throws IOException {
432+
return new ConsistencyCheck(header, tableSegment).perform();
442433
}
443434

444435
private boolean checkHeader() {
@@ -500,7 +491,7 @@ private boolean checkHeader() {
500491
}
501492

502493
// not closed properly, or inconsistency between dump file and header state
503-
if ( header.getOpenedTimestamp() >= header.getClosedTimestamp() || header.getNumKeys() == 0 && _dump.getDumpSize() > 0 ) {
494+
if ( header.getOpenedTimestamp() >= header.getClosedTimestamp() ) {
504495
_log.info("{} was not closed properly, checking consistency...", _lookupPath.getFileName());
505496
checkRequired = true;
506497
}
@@ -511,6 +502,11 @@ private boolean checkHeader() {
511502
checkRequired = true;
512503
}
513504

505+
if ( PARANOIA_MODE && !checkRequired ) {
506+
_log.info("{} hardcoded paranoia mode enabled, checking consistency...", _lookupPath.getFileName());
507+
checkRequired = true;
508+
}
509+
514510
if ( checkRequired ) {
515511
MemorySegment tableSegment = MemorySegment.mapFile(_lookupPath, header.getTableOffset(), header.getTableBytes(), READ_ONLY, autoClosedScope);
516512
if ( !checkConsistency(header, tableSegment) ) {
@@ -522,8 +518,8 @@ private boolean checkHeader() {
522518

523519
return true;
524520
}
525-
catch ( Exception e ) {
526-
throw new RuntimeException(e);
521+
catch ( Exception argh ) {
522+
throw new RuntimeException(argh);
527523
}
528524

529525
}
@@ -647,11 +643,14 @@ private void openExisting() throws IOException {
647643
openHeader(mapHeaderSegment());
648644

649645
mapTableSegment();
646+
647+
applyTableCorrections();
650648
}
651649

652650
private void openHeader( MemorySegment headerSegment ) {
653651
_header = new Header(_fileLayout, headerSegment);
654652
_header.setOpenedTimestamp(System.currentTimeMillis());
653+
655654
applyHeaderCorrections();
656655
}
657656

@@ -941,7 +940,8 @@ static final class OpenRangeMmapLongIdIndex<E> extends MmapLongIdIndex<E> {
941940

942941
private static long deriveMaxKeyFrom( long minKey ) {
943942
// this basically rotates the zero-index of the array, just like simple index addition/subtraction does during offset compensation
944-
return Long.MAX_VALUE + minKey;
943+
// return Long.MAX_VALUE + minKey; // this totally screws up the < comparison
944+
return minKey < 0 ? Long.MAX_VALUE + minKey : Long.MAX_VALUE;
945945
}
946946

947947
private final Object _growLock = new Object();
@@ -983,10 +983,18 @@ private void ensureTableContains( long index ) {
983983
}
984984

985985

986-
private static final class HeaderCorrections {
986+
private static final class IndexCorrections {
987987

988988
Long numKeys;
989989

990+
TLongLongMap rawContentCorrections; // no key or pos offsets here, just write values to array indexes verbatim
991+
992+
TLongLongMap rawContentCorrections() {
993+
if ( rawContentCorrections == null ) {
994+
rawContentCorrections = new TLongLongHashMap();
995+
}
996+
return rawContentCorrections;
997+
}
990998
}
991999

9921000

@@ -1032,4 +1040,194 @@ public long getLayoutVersion() {
10321040
}
10331041

10341042
}
1043+
1044+
1045+
private final class ConsistencyCheck {
1046+
1047+
private final Header _header;
1048+
private final MemorySegment _tableSegment;
1049+
private final long _segmentCapacity;
1050+
1051+
private boolean _consistent = true;
1052+
1053+
private long _numKeysInIndex;
1054+
1055+
public ConsistencyCheck( Header header, MemorySegment tableSegment ) {
1056+
_header = header;
1057+
_tableSegment = tableSegment;
1058+
_segmentCapacity = capacity(_tableSegment);
1059+
}
1060+
1061+
public boolean perform() throws IOException {
1062+
long start = System.nanoTime();
1063+
1064+
preloadSegment();
1065+
checkNumKeys();
1066+
1067+
if ( PARANOIA_MODE ) {
1068+
Map<Long, byte[]> originalCache = _dump._cache;
1069+
if ( originalCache != null ) { // otherwise things are uninitialized and lead to NPEs
1070+
_dump.setCache(new LRUCache<>((int)_numKeysInIndex * 2, 0.5f)); // temporarily increase cache
1071+
}
1072+
try (ResourceScope scope = ResourceScope.newConfinedScope()) {
1073+
MemorySegment dumpSegment = preloadDump(_dump, scope);
1074+
1075+
checkDumpElements();
1076+
checkIndexElements();
1077+
1078+
_log.info("{} releasing preload mapping of size {}", _lookupPath.getFileName(), dumpSegment.byteSize());
1079+
}
1080+
finally {
1081+
_dump.setCache(originalCache); // restore original configuration
1082+
}
1083+
}
1084+
1085+
Duration duration = Duration.ofNanos(System.nanoTime() - start);
1086+
if ( _consistent ) {
1087+
_log.info("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "consistent", duration);
1088+
} else {
1089+
_log.warn("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "inconsistent", duration);
1090+
}
1091+
1092+
return _consistent;
1093+
}
1094+
1095+
private void checkDumpElements() {
1096+
long start = System.nanoTime();
1097+
long numKeysInDump = 0;
1098+
1099+
long maxKey = _header.getMaxKey();
1100+
DumpIterator<E> iterator = _dump.iterator();
1101+
while ( iterator.hasNext() ) {
1102+
++numKeysInDump;
1103+
1104+
E element = iterator.next();
1105+
1106+
long elementKey = keyFor(element);
1107+
long elementIndex = keyOffsetApply(elementKey); // intentionally not bounds-checked
1108+
1109+
// not used during live operation, hence concurrency is not an issue
1110+
long posInIndex = getPosAt(_tableSegment, elementIndex);
1111+
long posInDump = iterator.getPosition();
1112+
1113+
if ( elementKey > maxKey ) {
1114+
_consistent = false;
1115+
_log.warn("{} Dump contains element with key {}, but upper bound for key is {}! Will rebuild index.", _lookupPath.getFileName(), elementKey,
1116+
maxKey);
1117+
} else if ( elementIndex >= _segmentCapacity ) {
1118+
_log.info(
1119+
"{} Dump contains element with key {}, but segment capacity is {}. Has probably been added to dump already, but not yet to index; fixing.",
1120+
_lookupPath.getFileName(), elementKey, _segmentCapacity);
1121+
_indexCorrections.rawContentCorrections().put(keyOffsetApply(elementKey), posOffsetApply(posInDump));
1122+
}
1123+
1124+
if ( posInIndex != posInDump ) {
1125+
_consistent = false;
1126+
_log.warn("{} Index claims element with key {} to be at position {}, but dump insists on {}! Will rebuild index.", _lookupPath.getFileName(),
1127+
elementKey, posInIndex, posInDump);
1128+
}
1129+
}
1130+
if ( _numKeysInIndex != numKeysInDump ) {
1131+
_log.warn("{} numKeys differ between index ({}) and dump ({}), fixing.", _lookupPath.getFileName(), _numKeysInIndex, numKeysInDump);
1132+
_indexCorrections.numKeys = numKeysInDump;
1133+
}
1134+
1135+
Duration duration = Duration.ofNanos(System.nanoTime() - start);
1136+
_log.info("{} was checked against dump iteration in {}", _lookupPath.getFileName(), duration);
1137+
}
1138+
1139+
private void checkIndexElements() {
1140+
long start = System.nanoTime();
1141+
for ( long arrayIndex = 0; arrayIndex < _segmentCapacity; ++arrayIndex ) {
1142+
// not used during live operation, hence concurrency is not an issue
1143+
long position = getPosAt(_tableSegment, arrayIndex);
1144+
if ( position >= 0 ) {
1145+
try {
1146+
E element = _dump.get(position);
1147+
1148+
if ( element == null ) {
1149+
_consistent = false;
1150+
_log.error("{} This is weird! Found position {} not to be deleted, but dump still returns null! Will rebuild index.",
1151+
_lookupPath.getFileName(), position);
1152+
} else {
1153+
long arrayKey = keyOffsetRevert(arrayIndex);
1154+
1155+
long elementKey = keyFor(element);
1156+
long elementIndex = keyOffsetApply(elementKey); // intentionally not bounds-checked
1157+
1158+
if ( elementIndex != arrayIndex ) {
1159+
_consistent = false;
1160+
_log.warn(
1161+
"{} Found position {} for key {} at index {}, but corresponding element from dump with key {} belongs at index {}! Will rebuild index.",
1162+
_lookupPath.getFileName(), position, arrayKey, arrayIndex, elementKey, elementIndex);
1163+
}
1164+
}
1165+
}
1166+
catch ( Exception argh ) {
1167+
_consistent = false;
1168+
_log.warn("{} Caught exception trying to get element at pos {} from dump! Will rebuild index.", _lookupPath.getFileName(), position, argh);
1169+
}
1170+
}
1171+
}
1172+
1173+
Duration duration = Duration.ofNanos(System.nanoTime() - start);
1174+
_log.info("{} was checked against dump lookups in {}", _lookupPath.getFileName(), duration);
1175+
}
1176+
1177+
private void checkNumKeys() {
1178+
long start = System.nanoTime();
1179+
long numKeysInIndex = 0;
1180+
1181+
long dumpSize = _dump.getDumpSize();
1182+
for ( long arrayIndex = 0; arrayIndex < _segmentCapacity; ++arrayIndex ) {
1183+
long position = getPosAt(_tableSegment, arrayIndex);
1184+
if ( position >= 0 ) {
1185+
++numKeysInIndex;
1186+
1187+
if ( position >= dumpSize ) {
1188+
_consistent = false;
1189+
_log.warn("{} Found position {} beyond the end of the dump file with size {}! Will rebuild index.", _lookupPath.getFileName(), position,
1190+
dumpSize);
1191+
}
1192+
1193+
if ( _dump._deletedPositions.contains(position) ) {
1194+
_log.warn("{} Found deleted position {} at index {}. Has probably been deleted from dump already, but not yet from index; fixing.",
1195+
_lookupPath.getFileName(), position, arrayIndex);
1196+
_indexCorrections.rawContentCorrections().put(arrayIndex, 0);
1197+
}
1198+
}
1199+
}
1200+
if ( _header.getNumKeys() != numKeysInIndex ) {
1201+
_log.info("{} numKeys differ between caching header ({}) and bare count in actual table ({}), fixing.", _lookupPath.getFileName(),
1202+
_header.getNumKeys(), numKeysInIndex);
1203+
_indexCorrections.numKeys = numKeysInIndex;
1204+
}
1205+
1206+
_numKeysInIndex = numKeysInIndex;
1207+
Duration duration = Duration.ofNanos(System.nanoTime() - start);
1208+
_log.info("{} had its contents checked in {}", _lookupPath.getFileName(), duration);
1209+
}
1210+
1211+
private MemorySegment preloadDump( Dump<E> dump, ResourceScope scope ) throws IOException {
1212+
long start = System.nanoTime();
1213+
1214+
Path dumpPath = Paths.get(dump._dumpFile.getPath());
1215+
long dumpSize = dump.getDumpSize();
1216+
MemorySegment dumpSegment = MemorySegment.mapFile(dumpPath, 0, dumpSize, READ_ONLY, scope);
1217+
dumpSegment.load(); // force-fetch into memory
1218+
1219+
Duration duration = Duration.ofNanos(System.nanoTime() - start);
1220+
_log.info("{} was mapped and preloaded in {}", dumpPath.getFileName(), duration);
1221+
return dumpSegment;
1222+
}
1223+
1224+
private void preloadSegment() {
1225+
long start = System.nanoTime();
1226+
1227+
_tableSegment.load(); // force-fetch into memory
1228+
1229+
Duration duration = Duration.ofNanos(System.nanoTime() - start);
1230+
_log.info("{} was preloaded in {}", _lookupPath.getFileName(), duration);
1231+
}
1232+
}
10351233
}

0 commit comments

Comments
 (0)