Skip to content

Commit 74bb044

Browse files
authored
HADOOP-19645. [ABFS][ReadAheadV2] Improve Metrics for Read Calls to identify type of read done. (#7837)
Contributed by Anuj Modi
1 parent cf4f97d commit 74bb044

18 files changed

+547
-105
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,11 @@ public class AbfsConfiguration{
138138
DefaultValue = DEFAULT_FOOTER_READ_BUFFER_SIZE)
139139
private int footerReadBufferSize;
140140

141+
@BooleanConfigurationValidatorAnnotation(
142+
ConfigurationKey = FS_AZURE_BUFFERED_PREAD_DISABLE,
143+
DefaultValue = DEFAULT_BUFFERED_PREAD_DISABLE)
144+
private boolean isBufferedPReadDisabled;
145+
141146
@BooleanConfigurationValidatorAnnotation(
142147
ConfigurationKey = FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED,
143148
DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED)
@@ -953,6 +958,14 @@ public int getFooterReadBufferSize() {
953958
return this.footerReadBufferSize;
954959
}
955960

961+
/**
962+
* Returns whether the buffered pread is disabled.
963+
* @return true if buffered pread is disabled, false otherwise.
964+
*/
965+
public boolean isBufferedPReadDisabled() {
966+
return this.isBufferedPReadDisabled;
967+
}
968+
956969
public int getReadBufferSize() {
957970
return this.readBufferSize;
958971
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -945,8 +945,9 @@ contentLength, populateAbfsInputStreamContext(
945945
private AbfsInputStreamContext populateAbfsInputStreamContext(
946946
Optional<Configuration> options, ContextEncryptionAdapter contextEncryptionAdapter) {
947947
boolean bufferedPreadDisabled = options
948-
.map(c -> c.getBoolean(FS_AZURE_BUFFERED_PREAD_DISABLE, false))
949-
.orElse(false);
948+
.map(c -> c.getBoolean(FS_AZURE_BUFFERED_PREAD_DISABLE,
949+
getAbfsConfiguration().isBufferedPReadDisabled()))
950+
.orElse(getAbfsConfiguration().isBufferedPReadDisabled());
950951
int footerReadBufferSize = options.map(c -> c.getInt(
951952
AZURE_FOOTER_READ_BUFFER_SIZE, getAbfsConfiguration().getFooterReadBufferSize()))
952953
.orElse(getAbfsConfiguration().getFooterReadBufferSize());

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ public final class AbfsHttpConstants {
174174
public static final char CHAR_STAR = '*';
175175
public static final char CHAR_PLUS = '+';
176176

177+
public static final int SPLIT_NO_LIMIT = -1;
178+
177179
/**
178180
* Specifies the version of the REST protocol used for processing the request.
179181
* Versions should be added in enum list in ascending chronological order.

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ public final class FileSystemConfigurations {
7575
public static final boolean DEFAULT_READ_SMALL_FILES_COMPLETELY = false;
7676
public static final boolean DEFAULT_OPTIMIZE_FOOTER_READ = true;
7777
public static final int DEFAULT_FOOTER_READ_BUFFER_SIZE = 512 * ONE_KB;
78+
public static final boolean DEFAULT_BUFFERED_PREAD_DISABLE = false;
7879
public static final boolean DEFAULT_ALWAYS_READ_BUFFER_SIZE = false;
7980
public static final int DEFAULT_READ_AHEAD_BLOCK_SIZE = 4 * ONE_MB;
8081
public static final int DEFAULT_READ_AHEAD_RANGE = 64 * ONE_KB; // 64 KB
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.azurebfs.constants;
20+
21+
/**
22+
* Enumeration for different types of read operations triggered by AbfsInputStream.
23+
*/
24+
public enum ReadType {
25+
/**
26+
* Synchronous read from the storage service. No optimization is being applied.
27+
*/
28+
DIRECT_READ("DR"),
29+
/**
30+
* Synchronous read from the storage service where optimization were considered but found disabled.
31+
*/
32+
NORMAL_READ("NR"),
33+
/**
34+
* Asynchronous read from the storage service for filling up cache.
35+
*/
36+
PREFETCH_READ("PR"),
37+
/**
38+
* Synchronous read from the storage service when nothing was found in cache.
39+
*/
40+
MISSEDCACHE_READ("MR"),
41+
/**
42+
* Synchronous read from the storage service for reading the footer of a file.
43+
* Only triggered when footer read optimization kicks in.
44+
*/
45+
FOOTER_READ("FR"),
46+
/**
47+
* Synchronous read from the storage service for reading a small file fully.
48+
* Only triggered when small file read optimization kicks in.
49+
*/
50+
SMALLFILE_READ("SR"),
51+
/**
52+
* None of the above read types were applicable.
53+
*/
54+
UNKNOWN_READ("UR");
55+
56+
private final String readType;
57+
58+
ReadType(String readType) {
59+
this.readType = readType;
60+
}
61+
62+
/**
63+
* Get the read type as a string.
64+
*
65+
* @return the read type string
66+
*/
67+
@Override
68+
public String toString() {
69+
return readType;
70+
}
71+
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.commons.lang3.StringUtils;
2828
import org.apache.hadoop.classification.VisibleForTesting;
2929
import org.apache.hadoop.fs.PositionedReadable;
30+
import org.apache.hadoop.fs.azurebfs.constants.ReadType;
3031
import org.apache.hadoop.fs.impl.BackReference;
3132
import org.apache.hadoop.util.Preconditions;
3233

@@ -165,6 +166,7 @@ public AbfsInputStream(
165166
this.tracingContext = new TracingContext(tracingContext);
166167
this.tracingContext.setOperation(FSOperationType.READ);
167168
this.tracingContext.setStreamID(inputStreamId);
169+
this.tracingContext.setReadType(ReadType.UNKNOWN_READ);
168170
this.context = abfsInputStreamContext;
169171
readAheadBlockSize = abfsInputStreamContext.getReadAheadBlockSize();
170172
if (abfsReadFooterMetrics != null) {
@@ -227,7 +229,9 @@ public int read(long position, byte[] buffer, int offset, int length)
227229
if (streamStatistics != null) {
228230
streamStatistics.readOperationStarted();
229231
}
230-
int bytesRead = readRemote(position, buffer, offset, length, tracingContext);
232+
TracingContext tc = new TracingContext(tracingContext);
233+
tc.setReadType(ReadType.DIRECT_READ);
234+
int bytesRead = readRemote(position, buffer, offset, length, tc);
231235
if (statistics != null) {
232236
statistics.incrementBytesRead(bytesRead);
233237
}
@@ -345,6 +349,8 @@ private int readOneBlock(final byte[] b, final int off, final int len) throws IO
345349
buffer = new byte[bufferSize];
346350
}
347351

352+
// Reset Read Type back to normal and set again based on code flow.
353+
tracingContext.setReadType(ReadType.NORMAL_READ);
348354
if (alwaysReadBufferSize) {
349355
bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false);
350356
} else {
@@ -385,6 +391,7 @@ private int readFileCompletely(final byte[] b, final int off, final int len)
385391
// data need to be copied to user buffer from index bCursor, bCursor has
386392
// to be the current fCusor
387393
bCursor = (int) fCursor;
394+
tracingContext.setReadType(ReadType.SMALLFILE_READ);
388395
return optimisedRead(b, off, len, 0, contentLength);
389396
}
390397

@@ -405,6 +412,7 @@ private int readLastBlock(final byte[] b, final int off, final int len)
405412
bCursor = (int) (fCursor - lastBlockStart);
406413
// 0 if contentlength is < buffersize
407414
long actualLenToRead = min(footerReadSize, contentLength);
415+
tracingContext.setReadType(ReadType.FOOTER_READ);
408416
return optimisedRead(b, off, len, lastBlockStart, actualLenToRead);
409417
}
410418

@@ -520,6 +528,7 @@ private int readInternal(final long position, final byte[] b, final int offset,
520528
LOG.debug("read ahead enabled issuing readheads num = {}", numReadAheads);
521529
TracingContext readAheadTracingContext = new TracingContext(tracingContext);
522530
readAheadTracingContext.setPrimaryRequestID();
531+
readAheadTracingContext.setReadType(ReadType.PREFETCH_READ);
523532
while (numReadAheads > 0 && nextOffset < contentLength) {
524533
LOG.debug("issuing read ahead requestedOffset = {} requested size {}",
525534
nextOffset, nextSize);
@@ -544,7 +553,9 @@ private int readInternal(final long position, final byte[] b, final int offset,
544553
}
545554

546555
// got nothing from read-ahead, do our own read now
547-
receivedBytes = readRemote(position, b, offset, length, new TracingContext(tracingContext));
556+
TracingContext tc = new TracingContext(tracingContext);
557+
tc.setReadType(ReadType.MISSEDCACHE_READ);
558+
receivedBytes = readRemote(position, b, offset, length, tc);
548559
return receivedBytes;
549560
} else {
550561
LOG.debug("read ahead disabled, reading remote");
@@ -578,6 +589,7 @@ int readRemote(long position, byte[] b, int offset, int length, TracingContext t
578589
streamStatistics.remoteReadOperation();
579590
}
580591
LOG.trace("Trigger client.read for path={} position={} offset={} length={}", path, position, offset, length);
592+
tracingContext.setPosition(String.valueOf(position));
581593
op = client.read(path, position, b, offset, length,
582594
tolerateOobAppends ? "*" : eTag, cachedSasToken.get(),
583595
contextEncryptionAdapter, tracingContext);

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobDeleteHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ public boolean execute() throws AzureBlobFileSystemException {
126126
*/
127127
deleted = recursive ? safeDelete(path) : deleteInternal(path);
128128
} finally {
129-
tracingContext.setOperatedBlobCount(null);
129+
tracingContext.setOperatedBlobCount(0);
130130
}
131131
if (deleteCount.get() == 0) {
132132
/*

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobRenameHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ private boolean finalSrcRename() throws AzureBlobFileSystemException {
204204
}
205205
throw e;
206206
} finally {
207-
tracingContext.setOperatedBlobCount(null);
207+
tracingContext.setOperatedBlobCount(0);
208208
}
209209
}
210210

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Listener.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.hadoop.fs.azurebfs.utils;
2020

2121
import org.apache.hadoop.fs.azurebfs.constants.FSOperationType;
22+
import org.apache.hadoop.fs.azurebfs.constants.ReadType;
2223

2324
/**
2425
* Interface for testing identifiers tracked via TracingContext
@@ -32,4 +33,5 @@ public interface Listener {
3233
void setOperation(FSOperationType operation);
3334
void updateIngressHandler(String ingressHandler);
3435
void updatePosition(String position);
36+
void updateReadType(ReadType readType);
3537
}

0 commit comments

Comments
 (0)