Skip to content

Commit 6d9d3df

Browse files
azakkermanVladimir Kotal
authored andcommitted
Fix for #3306: Speed up HistoryGuru.getRepository(File) call for Opengrok installations with many Repositories
and deep file trees by caching previous lookup results for dir -> Repository. Additional refactorings to make this new functionality testable against legacy implementation: - Introduce Jimfs for testing which will allow testing Windows, MacOS or UNIX filesystem on any machine - Refactor PathUtils to use Paths instead of Files (this is needed to use Jimfs)
1 parent 94ba42f commit 6d9d3df

File tree

15 files changed

+846
-113
lines changed

15 files changed

+846
-113
lines changed

opengrok-indexer/pom.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,12 @@ Portions Copyright (c) 2020-2020, Lubos Kosco <[email protected]>.
137137
<artifactId>junit-vintage-engine</artifactId>
138138
<scope>test</scope>
139139
</dependency>
140+
<dependency>
141+
<groupId>org.hamcrest</groupId>
142+
<artifactId>hamcrest-library</artifactId>
143+
<version>${hamcrest.version}</version>
144+
<scope>test</scope>
145+
</dependency>
140146
<dependency> <!-- TODO: remove! (moving Messages to web module) -->
141147
<groupId>org.awaitility</groupId>
142148
<artifactId>awaitility</artifactId>
@@ -155,6 +161,12 @@ Portions Copyright (c) 2020-2020, Lubos Kosco <[email protected]>.
155161
<version>2.28.2</version>
156162
<scope>test</scope>
157163
</dependency>
164+
<dependency>
165+
<groupId>com.google.jimfs</groupId>
166+
<artifactId>jimfs</artifactId>
167+
<version>1.1</version>
168+
<scope>test</scope>
169+
</dependency>
158170
<dependency>
159171
<groupId>com.cronutils</groupId>
160172
<artifactId>cron-utils</artifactId>

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.io.FileNotFoundException;
3030
import java.io.IOException;
3131
import java.nio.file.Path;
32+
import java.nio.file.Paths;
3233
import java.util.ArrayList;
3334
import java.util.Arrays;
3435
import java.util.Collections;
@@ -407,8 +408,8 @@ public String getPathRelativeToSourceRoot(File file)
407408
throw new FileNotFoundException("sourceRoot is not defined");
408409
}
409410

410-
String maybeRelPath = PathUtils.getRelativeToCanonical(file.getPath(),
411-
sourceRoot, getAllowedSymlinks(), getCanonicalRoots());
411+
String maybeRelPath = PathUtils.getRelativeToCanonical(file.toPath(),
412+
Paths.get(sourceRoot), getAllowedSymlinks(), getCanonicalRoots());
412413
File maybeRelFile = new File(maybeRelPath);
413414
if (!maybeRelFile.isAbsolute()) {
414415
/*

opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java

Lines changed: 20 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,14 @@
2828
import java.io.InputStream;
2929
import java.lang.reflect.InvocationTargetException;
3030
import java.nio.file.Path;
31-
import java.nio.file.Paths;
3231
import java.util.ArrayList;
3332
import java.util.Collection;
3433
import java.util.Collections;
3534
import java.util.Date;
3635
import java.util.HashMap;
3736
import java.util.List;
3837
import java.util.Map;
38+
import java.util.Objects;
3939
import java.util.Set;
4040
import java.util.concurrent.ConcurrentHashMap;
4141
import java.util.concurrent.CountDownLatch;
@@ -89,6 +89,11 @@ public final class HistoryGuru {
8989
*/
9090
private final Map<String, String> repositoryRoots = new ConcurrentHashMap<>();
9191

92+
/**
93+
* Interface to perform repository lookup for a given file path and HistoryGuru state.
94+
*/
95+
private final RepositoryLookup repositoryLookup;
96+
9297
/**
9398
* Creates a new instance of HistoryGuru, and try to set the default source
9499
* control system.
@@ -110,6 +115,7 @@ private HistoryGuru() {
110115
}
111116
}
112117
historyCache = cache;
118+
repositoryLookup = RepositoryLookup.cached();
113119
}
114120

115121
/**
@@ -733,38 +739,8 @@ private List<Repository> getReposFromString(Collection<String> repositories) {
733739
return repos;
734740
}
735741

736-
protected Repository getRepository(File path) {
737-
File file = path;
738-
Set<String> rootKeys = repositoryRoots.keySet();
739-
740-
while (file != null) {
741-
String nextPath = file.getPath();
742-
for (String rootKey : rootKeys) {
743-
String rel;
744-
try {
745-
rel = PathUtils.getRelativeToCanonical(nextPath, rootKey);
746-
} catch (IOException e) {
747-
LOGGER.log(Level.WARNING,
748-
"Failed to get relative to canonical for " + nextPath,
749-
e);
750-
return null;
751-
}
752-
Repository repo;
753-
if (rel.equals(nextPath)) {
754-
repo = repositories.get(nextPath);
755-
} else {
756-
String inRootPath = Paths.get(rootKey, rel).toString();
757-
repo = repositories.get(inRootPath);
758-
}
759-
if (repo != null) {
760-
return repo;
761-
}
762-
}
763-
764-
file = file.getParentFile();
765-
}
766-
767-
return null;
742+
protected Repository getRepository(File file) {
743+
return repositoryLookup.getRepository(file.toPath(), repositoryRoots.keySet(), repositories, PathUtils::getRelativeToCanonical);
768744
}
769745

770746
/**
@@ -774,10 +750,9 @@ protected Repository getRepository(File path) {
774750
* @param repos repository paths
775751
*/
776752
public void removeRepositories(Collection<String> repos) {
777-
for (String repo : repos) {
778-
repositories.remove(repo);
779-
}
780-
753+
Set<Repository> removedRepos = repos.stream().map(repositories::remove)
754+
.filter(Objects::nonNull).collect(Collectors.toSet());
755+
repositoryLookup.repositoriesRemoved(removedRepos);
781756
// Re-map the repository roots.
782757
repositoryRoots.clear();
783758
List<Repository> ccopy = new ArrayList<>(repositories.values());
@@ -825,8 +800,7 @@ public void invalidateRepositories(Collection<? extends RepositoryInfo> repos, L
825800
*/
826801
public void invalidateRepositories(Collection<? extends RepositoryInfo> repos, CommandTimeoutType cmdType) {
827802
if (repos == null || repos.isEmpty()) {
828-
repositoryRoots.clear();
829-
repositories.clear();
803+
clear();
830804
return;
831805
}
832806

@@ -885,14 +859,19 @@ public void run() {
885859
}
886860
executor.shutdown();
887861

888-
repositoryRoots.clear();
889-
repositories.clear();
862+
clear();
890863
newrepos.forEach((_key, repo) -> putRepository(repo));
891864

892865
elapsed.report(LOGGER, String.format("done invalidating %d repositories", newrepos.size()),
893866
"history.repositories.invalidate");
894867
}
895868

869+
private void clear() {
870+
repositoryRoots.clear();
871+
repositories.clear();
872+
repositoryLookup.clear();
873+
}
874+
896875
/**
897876
* Adds the specified {@code repository} to this instance's repository map
898877
* and repository-root map (if not already there).

opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ public void setDirectoryName(File dir) {
169169
String path;
170170
String originalPath = dir.getPath();
171171
try {
172-
path = PathUtils.getRelativeToCanonical(originalPath, rootPath);
172+
path = PathUtils.getRelativeToCanonical(dir.toPath(), Paths.get(rootPath));
173173
// OpenGrok has a weird convention that directoryNameRelative must start with a path separator,
174174
// as it is elsewhere directly appended to env.getSourceRootPath() and also stored as such.
175175
if (!path.equals(originalPath)) {
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2020, Anatoly Akkerman <[email protected]>, <[email protected]>.
22+
*/
23+
package org.opengrok.indexer.history;
24+
25+
import java.io.IOException;
26+
import java.nio.file.Path;
27+
import java.util.Collection;
28+
import java.util.Map;
29+
import java.util.Set;
30+
31+
/**
32+
* Interface for finding enclosing Repository for a given Path, used by HistoryGuru.
33+
* <p>
34+
* Two implementations exists
35+
* - uncached, (legacy behavior) extracted from HistoryGuru into a stand-alone impl
36+
* - cached, new implementation, which reduces number of expensive canonicalization calls
37+
* <p>
38+
* We preserve both cached and uncached implementations in order to verify correctness of the cached impl.
39+
*/
40+
public interface RepositoryLookup {
41+
42+
static RepositoryLookupCached cached() {
43+
return new RepositoryLookupCached();
44+
}
45+
46+
static RepositoryLookupUncached uncached() {
47+
return new RepositoryLookupUncached();
48+
}
49+
50+
/**
51+
* This interface allows intercepting PathUtils.getRelativeToCanonical in order to measure the impact of caching.
52+
*
53+
* In practice, PathUtils::getRelativeToCanonical is the implementation of this interface
54+
*/
55+
@FunctionalInterface
56+
interface PathCanonicalizer {
57+
String resolve(Path path, Path relativeTo) throws IOException;
58+
}
59+
60+
/**
61+
* Find enclosing repository for a given path.
62+
*
63+
* @param path path to find encolsing repository for
64+
* @param repoParentDirs Set of repository parent dirs (parents of repository roots)
65+
* @param repositories Map of repository root to Repository
66+
* @param canonicalizer PathCanonicalizer reference
67+
* @return enclosing Repository or null if not found
68+
*/
69+
Repository getRepository(Path path, Set<String> repoParentDirs, Map<String, Repository> repositories,
70+
PathCanonicalizer canonicalizer);
71+
72+
/**
73+
* Lifecycle method to clear internal cache (if any) when repositories are invalidated.
74+
*/
75+
void clear();
76+
77+
/**
78+
* Lifecycle method to invalidate any cache entries that point to given repositories that are being removed.
79+
*
80+
* @param removedRepos
81+
*/
82+
void repositoriesRemoved(Collection<Repository> removedRepos);
83+
}

0 commit comments

Comments
 (0)