2828
2929import gnu .trove .list .TLongList ;
3030import gnu .trove .list .array .TLongArrayList ;
31+ import gnu .trove .map .TLongLongMap ;
32+ import gnu .trove .map .hash .TLongLongHashMap ;
3133import jdk .incubator .foreign .GroupLayout ;
3234import jdk .incubator .foreign .MemoryLayout ;
3335import jdk .incubator .foreign .MemoryLayout .PathElement ;
3436import jdk .incubator .foreign .MemorySegment ;
3537import jdk .incubator .foreign .ResourceScope ;
38+ import util .dump .cache .LRUCache ;
3639import util .dump .reflection .FieldAccessor ;
3740import util .dump .reflection .FieldFieldAccessor ;
3841import util .dump .reflection .Reflection ;
@@ -50,6 +53,8 @@ public abstract class MmapLongIdIndex<E> extends DumpIndex<E> implements UniqueC
5053
5154 private static final Logger _log = LoggerFactory .getLogger (MmapLongIdIndex .class );
5255
56+ private static final boolean PARANOIA_MODE = true ;
57+
5358 private static final VarHandle LONG_ARRAY_ACCESS = sequenceLayout (JAVA_LONG ).varHandle (long .class , sequenceElement ());
5459
5560 public static <E > MmapLongIdIndex <E > forClosedRange ( Dump <E > dump , String fieldName , long minKey , long maxKey ) throws NoSuchFieldException {
@@ -95,7 +100,7 @@ private static void longArraySetVolatile( MemorySegment array, long index, long
95100 protected final long _minKey ;
96101 protected final long _maxKey ;
97102
98- private final HeaderCorrections _headerCorrections = new HeaderCorrections ();
103+ private final IndexCorrections _indexCorrections = new IndexCorrections ();
99104
100105 protected FileLayout _fileLayout ;
101106 private Header _header ;
@@ -277,9 +282,13 @@ protected boolean checkMeta() {
277282 }
278283
279284 protected boolean checkNumKeys ( Header header ) {
280- if ( header .getNumKeys () <= 0 ) {
285+ if ( header .getNumKeys () < 0 ) {
286+ return false ;
287+ }
288+ if ( header .getNumKeys () == 0 && _dump .getDumpSize () > 0 ) {
281289 return false ;
282290 }
291+
283292 return header .getNumKeys () <= header .getTableBytes () / Long .BYTES ;
284293 }
285294
@@ -399,46 +408,28 @@ private boolean add0( E elem, long pos ) {
399408 }
400409
401410 private void applyHeaderCorrections () {
402- if ( _headerCorrections .numKeys != null ) {
403- _header .setNumKeys (_headerCorrections .numKeys );
411+ if ( _indexCorrections .numKeys != null ) {
412+ _log .info ("{} fixing header..." , _lookupPath .getFileName ());
413+ _header .setNumKeys (_indexCorrections .numKeys );
404414 }
405415 }
406416
407- private boolean checkConsistency ( Header header , MemorySegment tableSegment ) {
408- boolean consistent = true ;
409- long start = System .nanoTime ();
417+ private void applyTableCorrections () {
418+ if ( _indexCorrections .rawContentCorrections != null ) {
419+ _log .info ("{} fixing table..." , _lookupPath .getFileName ());
420+ _indexCorrections .rawContentCorrections .forEachEntry (( index , posInfo ) -> {
421+ longArraySet (_tableSegment , index , posInfo );
422+ return true ;
423+ });
410424
411- long dumpSize = _dump ._outputStream ._n ;
412- long numKeys = 0 ;
413- for ( long i = 0 , n = capacity (tableSegment ); i < n ; ++i ) {
414- // not used during live operation, hence concurrency is not an issue
415- long pos = getPosAt (tableSegment , i );
416- if ( pos >= 0 ) {
417- if ( _dump ._deletedPositions .contains (pos ) ) {
418- _log .warn ("{} Found deleted position! Will rebuild index." , _lookupPath .getFileName ());
419- consistent = false ; // TODO: probably just an incomplete write during deletion, add offending positions to header corrections for later removal
420- }
421- if ( pos >= dumpSize ) {
422- _log .warn ("{} Found position {} beyond the end of the dump file with size {}! Will rebuild index." , _lookupPath .getFileName (), pos , dumpSize );
423- consistent = false ;
424- }
425-
426- ++numKeys ;
425+ if ( _header .getNumKeys () != getAllLongKeys ().length ) {
426+ throw new IllegalStateException (_lookupPath .getFileName () + " inconsistencies post-fixup, wtf" );
427427 }
428428 }
429+ }
429430
430- if ( numKeys != header .getNumKeys () ) {
431- _log .info ("{} numKeys differ between actual table ({}) and caching header ({}), fixing." , _lookupPath .getFileName (), numKeys , header .getNumKeys ());
432- _headerCorrections .numKeys = numKeys ;
433- }
434-
435- Duration duration = Duration .ofNanos (System .nanoTime () - start );
436- if ( consistent ) {
437- _log .info ("{} was checked and found to be {} in {}" , _lookupPath .getFileName (), "consistent" , duration );
438- } else {
439- _log .warn ("{} was checked and found to be {} in {}" , _lookupPath .getFileName (), "inconsistent" , duration );
440- }
441- return consistent ;
431+ private boolean checkConsistency ( Header header , MemorySegment tableSegment ) throws IOException {
432+ return new ConsistencyCheck (header , tableSegment ).perform ();
442433 }
443434
444435 private boolean checkHeader () {
@@ -500,7 +491,7 @@ private boolean checkHeader() {
500491 }
501492
502493 // not closed properly, or inconsistency between dump file and header state
503- if ( header .getOpenedTimestamp () >= header .getClosedTimestamp () || header . getNumKeys () == 0 && _dump . getDumpSize () > 0 ) {
494+ if ( header .getOpenedTimestamp () >= header .getClosedTimestamp () ) {
504495 _log .info ("{} was not closed properly, checking consistency..." , _lookupPath .getFileName ());
505496 checkRequired = true ;
506497 }
@@ -511,6 +502,11 @@ private boolean checkHeader() {
511502 checkRequired = true ;
512503 }
513504
505+ if ( PARANOIA_MODE && !checkRequired ) {
506+ _log .info ("{} hardcoded paranoia mode enabled, checking consistency..." , _lookupPath .getFileName ());
507+ checkRequired = true ;
508+ }
509+
514510 if ( checkRequired ) {
515511 MemorySegment tableSegment = MemorySegment .mapFile (_lookupPath , header .getTableOffset (), header .getTableBytes (), READ_ONLY , autoClosedScope );
516512 if ( !checkConsistency (header , tableSegment ) ) {
@@ -522,8 +518,8 @@ private boolean checkHeader() {
522518
523519 return true ;
524520 }
525- catch ( Exception e ) {
526- throw new RuntimeException (e );
521+ catch ( Exception argh ) {
522+ throw new RuntimeException (argh );
527523 }
528524
529525 }
@@ -647,11 +643,14 @@ private void openExisting() throws IOException {
647643 openHeader (mapHeaderSegment ());
648644
649645 mapTableSegment ();
646+
647+ applyTableCorrections ();
650648 }
651649
652650 private void openHeader ( MemorySegment headerSegment ) {
653651 _header = new Header (_fileLayout , headerSegment );
654652 _header .setOpenedTimestamp (System .currentTimeMillis ());
653+
655654 applyHeaderCorrections ();
656655 }
657656
@@ -941,7 +940,8 @@ static final class OpenRangeMmapLongIdIndex<E> extends MmapLongIdIndex<E> {
941940
942941 private static long deriveMaxKeyFrom ( long minKey ) {
943942 // this basically rotates the zero-index of the array, just like simple index addition/subtraction does during offset compensation
944- return Long .MAX_VALUE + minKey ;
943+ // return Long.MAX_VALUE + minKey; // this totally screws up the < comparison
944+ return minKey < 0 ? Long .MAX_VALUE + minKey : Long .MAX_VALUE ;
945945 }
946946
947947 private final Object _growLock = new Object ();
@@ -983,10 +983,18 @@ private void ensureTableContains( long index ) {
983983 }
984984
985985
986- private static final class HeaderCorrections {
986+ private static final class IndexCorrections {
987987
988988 Long numKeys ;
989989
990+ TLongLongMap rawContentCorrections ; // no key or pos offsets here, just write values to array indexes verbatim
991+
992+ TLongLongMap rawContentCorrections () {
993+ if ( rawContentCorrections == null ) {
994+ rawContentCorrections = new TLongLongHashMap ();
995+ }
996+ return rawContentCorrections ;
997+ }
990998 }
991999
9921000
@@ -1032,4 +1040,194 @@ public long getLayoutVersion() {
10321040 }
10331041
10341042 }
1043+
1044+
1045+ private final class ConsistencyCheck {
1046+
1047+ private final Header _header ;
1048+ private final MemorySegment _tableSegment ;
1049+ private final long _segmentCapacity ;
1050+
1051+ private boolean _consistent = true ;
1052+
1053+ private long _numKeysInIndex ;
1054+
1055+ public ConsistencyCheck ( Header header , MemorySegment tableSegment ) {
1056+ _header = header ;
1057+ _tableSegment = tableSegment ;
1058+ _segmentCapacity = capacity (_tableSegment );
1059+ }
1060+
1061+ public boolean perform () throws IOException {
1062+ long start = System .nanoTime ();
1063+
1064+ preloadSegment ();
1065+ checkNumKeys ();
1066+
1067+ if ( PARANOIA_MODE ) {
1068+ Map <Long , byte []> originalCache = _dump ._cache ;
1069+ if ( originalCache != null ) { // otherwise things are uninitialized and lead to NPEs
1070+ _dump .setCache (new LRUCache <>((int )_numKeysInIndex * 2 , 0.5f )); // temporarily increase cache
1071+ }
1072+ try (ResourceScope scope = ResourceScope .newConfinedScope ()) {
1073+ MemorySegment dumpSegment = preloadDump (_dump , scope );
1074+
1075+ checkDumpElements ();
1076+ checkIndexElements ();
1077+
1078+ _log .info ("{} releasing preload mapping of size {}" , _lookupPath .getFileName (), dumpSegment .byteSize ());
1079+ }
1080+ finally {
1081+ _dump .setCache (originalCache ); // restore original configuration
1082+ }
1083+ }
1084+
1085+ Duration duration = Duration .ofNanos (System .nanoTime () - start );
1086+ if ( _consistent ) {
1087+ _log .info ("{} was checked and found to be {} in {}" , _lookupPath .getFileName (), "consistent" , duration );
1088+ } else {
1089+ _log .warn ("{} was checked and found to be {} in {}" , _lookupPath .getFileName (), "inconsistent" , duration );
1090+ }
1091+
1092+ return _consistent ;
1093+ }
1094+
1095+ private void checkDumpElements () {
1096+ long start = System .nanoTime ();
1097+ long numKeysInDump = 0 ;
1098+
1099+ long maxKey = _header .getMaxKey ();
1100+ DumpIterator <E > iterator = _dump .iterator ();
1101+ while ( iterator .hasNext () ) {
1102+ ++numKeysInDump ;
1103+
1104+ E element = iterator .next ();
1105+
1106+ long elementKey = keyFor (element );
1107+ long elementIndex = keyOffsetApply (elementKey ); // intentionally not bounds-checked
1108+
1109+ // not used during live operation, hence concurrency is not an issue
1110+ long posInIndex = getPosAt (_tableSegment , elementIndex );
1111+ long posInDump = iterator .getPosition ();
1112+
1113+ if ( elementKey > maxKey ) {
1114+ _consistent = false ;
1115+ _log .warn ("{} Dump contains element with key {}, but upper bound for key is {}! Will rebuild index." , _lookupPath .getFileName (), elementKey ,
1116+ maxKey );
1117+ } else if ( elementIndex >= _segmentCapacity ) {
1118+ _log .info (
1119+ "{} Dump contains element with key {}, but segment capacity is {}. Has probably been added to dump already, but not yet to index; fixing." ,
1120+ _lookupPath .getFileName (), elementKey , _segmentCapacity );
1121+ _indexCorrections .rawContentCorrections ().put (keyOffsetApply (elementKey ), posOffsetApply (posInDump ));
1122+ }
1123+
1124+ if ( posInIndex != posInDump ) {
1125+ _consistent = false ;
1126+ _log .warn ("{} Index claims element with key {} to be at position {}, but dump insists on {}! Will rebuild index." , _lookupPath .getFileName (),
1127+ elementKey , posInIndex , posInDump );
1128+ }
1129+ }
1130+ if ( _numKeysInIndex != numKeysInDump ) {
1131+ _log .warn ("{} numKeys differ between index ({}) and dump ({}), fixing." , _lookupPath .getFileName (), _numKeysInIndex , numKeysInDump );
1132+ _indexCorrections .numKeys = numKeysInDump ;
1133+ }
1134+
1135+ Duration duration = Duration .ofNanos (System .nanoTime () - start );
1136+ _log .info ("{} was checked against dump iteration in {}" , _lookupPath .getFileName (), duration );
1137+ }
1138+
1139+ private void checkIndexElements () {
1140+ long start = System .nanoTime ();
1141+ for ( long arrayIndex = 0 ; arrayIndex < _segmentCapacity ; ++arrayIndex ) {
1142+ // not used during live operation, hence concurrency is not an issue
1143+ long position = getPosAt (_tableSegment , arrayIndex );
1144+ if ( position >= 0 ) {
1145+ try {
1146+ E element = _dump .get (position );
1147+
1148+ if ( element == null ) {
1149+ _consistent = false ;
1150+ _log .error ("{} This is weird! Found position {} not to be deleted, but dump still returns null! Will rebuild index." ,
1151+ _lookupPath .getFileName (), position );
1152+ } else {
1153+ long arrayKey = keyOffsetRevert (arrayIndex );
1154+
1155+ long elementKey = keyFor (element );
1156+ long elementIndex = keyOffsetApply (elementKey ); // intentionally not bounds-checked
1157+
1158+ if ( elementIndex != arrayIndex ) {
1159+ _consistent = false ;
1160+ _log .warn (
1161+ "{} Found position {} for key {} at index {}, but corresponding element from dump with key {} belongs at index {}! Will rebuild index." ,
1162+ _lookupPath .getFileName (), position , arrayKey , arrayIndex , elementKey , elementIndex );
1163+ }
1164+ }
1165+ }
1166+ catch ( Exception argh ) {
1167+ _consistent = false ;
1168+ _log .warn ("{} Caught exception trying to get element at pos {} from dump! Will rebuild index." , _lookupPath .getFileName (), position , argh );
1169+ }
1170+ }
1171+ }
1172+
1173+ Duration duration = Duration .ofNanos (System .nanoTime () - start );
1174+ _log .info ("{} was checked against dump lookups in {}" , _lookupPath .getFileName (), duration );
1175+ }
1176+
1177+ private void checkNumKeys () {
1178+ long start = System .nanoTime ();
1179+ long numKeysInIndex = 0 ;
1180+
1181+ long dumpSize = _dump .getDumpSize ();
1182+ for ( long arrayIndex = 0 ; arrayIndex < _segmentCapacity ; ++arrayIndex ) {
1183+ long position = getPosAt (_tableSegment , arrayIndex );
1184+ if ( position >= 0 ) {
1185+ ++numKeysInIndex ;
1186+
1187+ if ( position >= dumpSize ) {
1188+ _consistent = false ;
1189+ _log .warn ("{} Found position {} beyond the end of the dump file with size {}! Will rebuild index." , _lookupPath .getFileName (), position ,
1190+ dumpSize );
1191+ }
1192+
1193+ if ( _dump ._deletedPositions .contains (position ) ) {
1194+ _log .warn ("{} Found deleted position {} at index {}. Has probably been deleted from dump already, but not yet from index; fixing." ,
1195+ _lookupPath .getFileName (), position , arrayIndex );
1196+ _indexCorrections .rawContentCorrections ().put (arrayIndex , 0 );
1197+ }
1198+ }
1199+ }
1200+ if ( _header .getNumKeys () != numKeysInIndex ) {
1201+ _log .info ("{} numKeys differ between caching header ({}) and bare count in actual table ({}), fixing." , _lookupPath .getFileName (),
1202+ _header .getNumKeys (), numKeysInIndex );
1203+ _indexCorrections .numKeys = numKeysInIndex ;
1204+ }
1205+
1206+ _numKeysInIndex = numKeysInIndex ;
1207+ Duration duration = Duration .ofNanos (System .nanoTime () - start );
1208+ _log .info ("{} had its contents checked in {}" , _lookupPath .getFileName (), duration );
1209+ }
1210+
1211+ private MemorySegment preloadDump ( Dump <E > dump , ResourceScope scope ) throws IOException {
1212+ long start = System .nanoTime ();
1213+
1214+ Path dumpPath = Paths .get (dump ._dumpFile .getPath ());
1215+ long dumpSize = dump .getDumpSize ();
1216+ MemorySegment dumpSegment = MemorySegment .mapFile (dumpPath , 0 , dumpSize , READ_ONLY , scope );
1217+ dumpSegment .load (); // force-fetch into memory
1218+
1219+ Duration duration = Duration .ofNanos (System .nanoTime () - start );
1220+ _log .info ("{} was mapped and preloaded in {}" , dumpPath .getFileName (), duration );
1221+ return dumpSegment ;
1222+ }
1223+
1224+ private void preloadSegment () {
1225+ long start = System .nanoTime ();
1226+
1227+ _tableSegment .load (); // force-fetch into memory
1228+
1229+ Duration duration = Duration .ofNanos (System .nanoTime () - start );
1230+ _log .info ("{} was preloaded in {}" , _lookupPath .getFileName (), duration );
1231+ }
1232+ }
10351233}
0 commit comments