Skip to content

Commit 84741ed

Browse files
authored
Add cache to globStatus in OFD (#427)
1 parent a88c055 commit 84741ed

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/Operations.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import java.util.HashMap;
1919
import java.util.List;
2020
import java.util.Map;
21+
import java.util.concurrent.ConcurrentHashMap;
2122
import java.util.concurrent.TimeUnit;
2223
import java.util.function.Predicate;
2324
import java.util.stream.Collectors;
@@ -104,6 +105,7 @@ public DeleteOrphanFiles.Result deleteOrphanFiles(
104105
if (olderThanTimestampMillis > 0) {
105106
operation = operation.olderThan(olderThanTimestampMillis);
106107
}
108+
Map<String, Boolean> dataManifestsCache = new ConcurrentHashMap<>();
107109
Path backupDirRoot = new Path(table.location(), backupDir);
108110
Path dataDirRoot = new Path(table.location(), "data");
109111
operation =
@@ -120,7 +122,7 @@ public DeleteOrphanFiles.Result deleteOrphanFiles(
120122
log.info("Skipped deleting backup file {}", file);
121123
} else if (file.contains(dataDirRoot.toString())
122124
&& backupEnabled
123-
&& isExistBackupDataManifests(table, file, backupDir)) {
125+
&& isExistBackupDataManifests(table, file, backupDir, dataManifestsCache)) {
124126
// move data files to backup dir if backup is enabled
125127
Path backupFilePath = getTrashPath(table, file, backupDir);
126128
log.info("Moving orphan file {} to {}", file, backupFilePath);
@@ -143,12 +145,18 @@ && isExistBackupDataManifests(table, file, backupDir)) {
143145
return operation.execute();
144146
}
145147

146-
private boolean isExistBackupDataManifests(Table table, String file, String backupDir) {
148+
private boolean isExistBackupDataManifests(
149+
Table table, String file, String backupDir, Map<String, Boolean> dataManifestsCache) {
147150
try {
148-
Path backupFilePath = getTrashPath(table, file, backupDir);
149-
Path pattern = new Path(backupFilePath.getParent(), "data_manifest*");
151+
Path backupPartition = getTrashPath(table, file, backupDir).getParent();
152+
if (dataManifestsCache.containsKey(backupPartition.toString())) {
153+
return dataManifestsCache.get(backupPartition.toString());
154+
}
155+
Path pattern = new Path(backupPartition, "data_manifest*");
150156
FileStatus[] matches = fs().globStatus(pattern);
151-
return matches != null && matches.length > 0;
157+
boolean isExist = matches != null && matches.length > 0;
158+
dataManifestsCache.put(backupPartition.toString(), isExist);
159+
return isExist;
152160
} catch (IOException e) {
153161
return false;
154162
}

0 commit comments

Comments
 (0)