17
17
package org .apache .kafka .storage .internals .log ;
18
18
19
19
import org .apache .kafka .common .utils .ByteBufferUnmapper ;
20
- import org .apache .kafka .common .utils .OperatingSystem ;
21
20
import org .apache .kafka .common .utils .Utils ;
21
+ import org .apache .kafka .server .util .LockUtils ;
22
22
23
23
import org .slf4j .Logger ;
24
24
import org .slf4j .LoggerFactory ;
33
33
import java .nio .file .Files ;
34
34
import java .util .Objects ;
35
35
import java .util .OptionalInt ;
36
- import java .util .concurrent .locks .Lock ;
37
36
import java .util .concurrent .locks .ReentrantLock ;
37
+ import java .util .concurrent .locks .ReentrantReadWriteLock ;
38
38
39
39
/**
40
40
* The abstract index class which holds entry format agnostic methods.
@@ -47,7 +47,18 @@ private enum SearchResultType {
47
47
48
48
private static final Logger log = LoggerFactory .getLogger (AbstractIndex .class );
49
49
50
- protected final ReentrantLock lock = new ReentrantLock ();
50
+ // Serializes all index operations that mutate internal state.
51
+ // Readers do not need to acquire this lock because:
52
+ // 1) MappedByteBuffer provides direct access to the OS-level buffer cache,
53
+ // which allows concurrent reads in practice.
54
+ // 2) Clients only read committed data and are not affected by concurrent appends/truncates.
55
+ // In the rare case when the data is truncated, the follower could read inconsistent data.
56
+ // The follower has the logic to ignore the inconsistent data through crc and leader epoch.
57
+ // 3) Read and remap operations are coordinated via remapLock to ensure visibility of the
58
+ // underlying mmap.
59
+ private final ReentrantLock lock = new ReentrantLock ();
60
+ // Allows concurrent read operations while ensuring exclusive access if the underlying mmap is changed
61
+ private final ReentrantReadWriteLock remapLock = new ReentrantReadWriteLock ();
51
62
52
63
private final long baseOffset ;
53
64
private final int maxIndexSize ;
@@ -187,36 +198,32 @@ public void updateParentDir(File parentDir) {
187
198
* @return a boolean indicating whether the size of the memory map and the underneath file is changed or not.
188
199
*/
189
200
public boolean resize (int newSize ) throws IOException {
190
- lock .lock ();
191
- try {
192
- int roundedNewSize = roundDownToExactMultiple (newSize , entrySize ());
193
-
194
- if (length == roundedNewSize ) {
195
- log .debug ("Index {} was not resized because it already has size {}" , file .getAbsolutePath (), roundedNewSize );
196
- return false ;
197
- } else {
198
- RandomAccessFile raf = new RandomAccessFile (file , "rw" );
199
- try {
200
- int position = mmap .position ();
201
-
202
- /* Windows or z/OS won't let us modify the file length while the file is mmapped :-( */
203
- if (OperatingSystem .IS_WINDOWS || OperatingSystem .IS_ZOS )
204
- safeForceUnmap ();
205
- raf .setLength (roundedNewSize );
206
- this .length = roundedNewSize ;
207
- mmap = raf .getChannel ().map (FileChannel .MapMode .READ_WRITE , 0 , roundedNewSize );
208
- this .maxEntries = mmap .limit () / entrySize ();
209
- mmap .position (position );
210
- log .debug ("Resized {} to {}, position is {} and limit is {}" , file .getAbsolutePath (), roundedNewSize ,
211
- mmap .position (), mmap .limit ());
212
- return true ;
213
- } finally {
214
- Utils .closeQuietly (raf , "index file " + file .getName ());
215
- }
216
- }
217
- } finally {
218
- lock .unlock ();
219
- }
201
+ return inLock (() ->
202
+ inRemapWriteLock (() -> {
203
+ int roundedNewSize = roundDownToExactMultiple (newSize , entrySize ());
204
+
205
+ if (length == roundedNewSize ) {
206
+ log .debug ("Index {} was not resized because it already has size {}" , file .getAbsolutePath (), roundedNewSize );
207
+ return false ;
208
+ } else {
209
+ RandomAccessFile raf = new RandomAccessFile (file , "rw" );
210
+ try {
211
+ int position = mmap .position ();
212
+
213
+ safeForceUnmap ();
214
+ raf .setLength (roundedNewSize );
215
+ this .length = roundedNewSize ;
216
+ mmap = raf .getChannel ().map (FileChannel .MapMode .READ_WRITE , 0 , roundedNewSize );
217
+ this .maxEntries = mmap .limit () / entrySize ();
218
+ mmap .position (position );
219
+ log .debug ("Resized {} to {}, position is {} and limit is {}" , file .getAbsolutePath (), roundedNewSize ,
220
+ mmap .position (), mmap .limit ());
221
+ return true ;
222
+ } finally {
223
+ Utils .closeQuietly (raf , "index file " + file .getName ());
224
+ }
225
+ }
226
+ }));
220
227
}
221
228
222
229
/**
@@ -236,12 +243,9 @@ public void renameTo(File f) throws IOException {
236
243
* Flush the data in the index to disk
237
244
*/
238
245
public void flush () {
239
- lock .lock ();
240
- try {
246
+ inLock (() -> {
241
247
mmap .force ();
242
- } finally {
243
- lock .unlock ();
244
- }
248
+ });
245
249
}
246
250
247
251
/**
@@ -261,12 +265,11 @@ public boolean deleteIfExists() throws IOException {
261
265
* the file.
262
266
*/
263
267
public void trimToValidSize () throws IOException {
264
- lock .lock ();
265
- try {
266
- resize (entrySize () * entries );
267
- } finally {
268
- lock .unlock ();
269
- }
268
+ inLock (() -> {
269
+ if (mmap != null ) {
270
+ resize (entrySize () * entries );
271
+ }
272
+ });
270
273
}
271
274
272
275
/**
@@ -286,12 +289,7 @@ public void closeHandler() {
286
289
// However, in some cases it can pause application threads(STW) for a long moment reading metadata from a physical disk.
287
290
// To prevent this, we forcefully cleanup memory mapping within proper execution which never affects API responsiveness.
288
291
// See https://issues.apache.org/jira/browse/KAFKA-4614 for the details.
289
- lock .lock ();
290
- try {
291
- safeForceUnmap ();
292
- } finally {
293
- lock .unlock ();
294
- }
292
+ inLock (() -> inRemapWriteLock (this ::safeForceUnmap ));
295
293
}
296
294
297
295
/**
@@ -418,20 +416,28 @@ protected void truncateToEntries0(int entries) {
418
416
mmap .position (entries * entrySize ());
419
417
}
420
418
421
- /**
422
- * Execute the given function in a lock only if we are running on windows or z/OS. We do this
423
- * because Windows or z/OS won't let us resize a file while it is mmapped. As a result we have to force unmap it
424
- * and this requires synchronizing reads.
425
- */
426
- protected final <T , E extends Exception > T maybeLock (Lock lock , StorageAction <T , E > action ) throws E {
427
- if (OperatingSystem .IS_WINDOWS || OperatingSystem .IS_ZOS )
428
- lock .lock ();
429
- try {
430
- return action .execute ();
431
- } finally {
432
- if (OperatingSystem .IS_WINDOWS || OperatingSystem .IS_ZOS )
433
- lock .unlock ();
434
- }
419
+ protected final <T , E extends Exception > T inLock (LockUtils .ThrowingSupplier <T , E > action ) throws E {
420
+ return LockUtils .inLock (lock , action );
421
+ }
422
+
423
+ protected final <E extends Exception > void inLock (LockUtils .ThrowingRunnable <E > action ) throws E {
424
+ LockUtils .inLock (lock , action );
425
+ }
426
+
427
+ protected final <T , E extends Exception > T inRemapReadLock (LockUtils .ThrowingSupplier <T , E > action ) throws E {
428
+ return LockUtils .inLock (remapLock .readLock (), action );
429
+ }
430
+
431
+ protected final <E extends Exception > void inRemapReadLock (LockUtils .ThrowingRunnable <E > action ) throws E {
432
+ LockUtils .inLock (remapLock .readLock (), action );
433
+ }
434
+
435
+ protected final <T , E extends Exception > T inRemapWriteLock (LockUtils .ThrowingSupplier <T , E > action ) throws E {
436
+ return LockUtils .inLock (remapLock .writeLock (), action );
437
+ }
438
+
439
+ protected final <E extends Exception > void inRemapWriteLock (LockUtils .ThrowingRunnable <E > action ) throws E {
440
+ LockUtils .inLock (remapLock .writeLock (), action );
435
441
}
436
442
437
443
/**
0 commit comments