Skip to content

Commit df98c2e

Browse files
authored
HIVE-29148: Replace IcebergSplit#blockLocations with Iceberg's utility (apache#6029)
1 parent 08944ea commit df98c2e

File tree

1 file changed

+2
-29
lines changed
  • iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce

1 file changed

+2
-29
lines changed

iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergSplit.java

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,17 @@
2222
import java.io.DataInput;
2323
import java.io.DataOutput;
2424
import java.io.IOException;
25-
import java.util.Arrays;
26-
import java.util.Set;
2725
import org.apache.hadoop.conf.Configuration;
28-
import org.apache.hadoop.fs.BlockLocation;
29-
import org.apache.hadoop.fs.FileSystem;
30-
import org.apache.hadoop.fs.Path;
3126
import org.apache.hadoop.mapreduce.InputSplit;
3227
import org.apache.iceberg.FileScanTask;
3328
import org.apache.iceberg.ScanTaskGroup;
29+
import org.apache.iceberg.hadoop.Util;
3430
import org.apache.iceberg.mr.InputFormatConfig;
35-
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
3631
import org.apache.iceberg.util.SerializationUtil;
37-
import org.slf4j.Logger;
38-
import org.slf4j.LoggerFactory;
3932

4033
// Since this class extends `mapreduce.InputSplit and implements `mapred.InputSplit`, it can be returned by both MR v1
4134
// and v2 file formats.
4235
public class IcebergSplit extends InputSplit implements IcebergSplitContainer {
43-
private static final Logger LOG = LoggerFactory.getLogger(IcebergSplit.class);
4436

4537
public static final String[] ANYWHERE = new String[]{"*"};
4638

@@ -78,33 +70,14 @@ public String[] getLocations() {
7870
// getLocations() won't be accurate when called on worker nodes and will always return "*"
7971
if (locations == null && conf != null) {
8072
boolean localityPreferred = conf.getBoolean(InputFormatConfig.LOCALITY, false);
81-
locations = localityPreferred ? blockLocations(taskGroup, conf) : ANYWHERE;
73+
locations = localityPreferred ? Util.blockLocations(taskGroup, conf) : ANYWHERE;
8274
} else {
8375
locations = ANYWHERE;
8476
}
8577

8678
return locations;
8779
}
8880

89-
// We should move to Util.blockLocations once the following PR is merged and shipped
90-
// https://github.com/apache/iceberg/pull/11053
91-
private static String[] blockLocations(ScanTaskGroup<FileScanTask> task, Configuration conf) {
92-
final Set<String> locationSets = Sets.newHashSet();
93-
task.tasks().forEach(fileScanTask -> {
94-
final Path path = new Path(fileScanTask.file().path().toString());
95-
try {
96-
final FileSystem fs = path.getFileSystem(conf);
97-
for (BlockLocation location : fs.getFileBlockLocations(path, fileScanTask.start(), fileScanTask.length())) {
98-
locationSets.addAll(Arrays.asList(location.getHosts()));
99-
}
100-
} catch (IOException e) {
101-
LOG.warn("Failed to get block locations for path {}", path, e);
102-
}
103-
});
104-
105-
return locationSets.toArray(new String[0]);
106-
}
107-
10881
@Override
10982
public void write(DataOutput out) throws IOException {
11083
byte[] data = SerializationUtil.serializeToBytes(this.taskGroup);

0 commit comments

Comments
 (0)