Skip to content

Commit 9840aef

Browse files
committed
normalize scan depth based on the number of path components
fixes #4119
1 parent 7c25c9b commit 9840aef

File tree

6 files changed

+83
-46
lines changed

6 files changed

+83
-46
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected]>.
2323
* Portions Copyright (c) 2020, Aleksandr Kirillov <[email protected]>.
2424
*/
@@ -226,9 +226,9 @@ public final class Configuration {
226226
* The directory hierarchy depth to limit the scanning for repositories.
227227
* E.g. if the /mercurial/ directory (relative to source root) is a repository
228228
* and /mercurial/usr/closed/ is sub-repository, the latter will be discovered
229-
* only if the depth is set to 2 or greater.
229+
* only if the depth is set to 3 or greater.
230230
*/
231-
public static final int defaultScanningDepth = 2;
231+
public static final int DEFAULT_SCANNING_DEPTH = 3;
232232

233233
/**
234234
* The name of the eftar file relative to the <var>DATA_ROOT</var>, which
@@ -576,7 +576,7 @@ public Configuration() {
576576
//setReviewPage("http://arc.myserver.org/caselog/PSARC/");
577577
setReviewPattern("\\b(\\d{4}/\\d{3})\\b"); // in form e.g. PSARC 2008/305
578578
setRevisionMessageCollapseThreshold(200);
579-
setScanningDepth(defaultScanningDepth); // default depth of scanning for repositories
579+
setScanningDepth(DEFAULT_SCANNING_DEPTH); // default depth of scanning for repositories
580580
setScopesEnabled(true);
581581
setSourceRoot(null);
582582
//setTabSize(4);

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -804,11 +804,18 @@ public void removeRepositories() {
804804
* Search through the directory for repositories and use the result to replace
805805
* the lists of repositories in both RuntimeEnvironment/Configuration and HistoryGuru.
806806
*
807-
* @param dir the directories to start the search in
807+
* @param dir paths of directories to start the search in
808808
*/
809809
public void setRepositories(String... dir) {
810+
int scanDepth = RuntimeEnvironment.getInstance().getScanningDepth();
811+
if (scanDepth == 0) {
812+
LOGGER.log(Level.INFO, "maximum scan depth is set to 0, will not scan for repositories");
813+
return;
814+
}
815+
810816
List<RepositoryInfo> repos = new ArrayList<>(HistoryGuru.getInstance().
811817
addRepositories(Arrays.stream(dir).map(File::new).toArray(File[]::new)));
818+
812819
setRepositories(repos);
813820
}
814821

opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -734,16 +734,21 @@ public Map<String, Date> getLastModifiedTimes(File directory) throws CacheExcept
734734
}
735735

736736
/**
737-
* recursively search for repositories with a depth limit, add those found to the internally used map.
737+
* Recursively search for repositories with a depth limit, add those found to the internally used map.
738+
* @see #putRepository(Repository)
738739
*
739-
* @param files list of files to check if they contain a repository
740+
* @param files list of directories to check if they contain a repository
740741
* @param allowedNesting number of levels of nested repos to allow
741-
* @param depth current depth - using global scanningDepth - one can limit this to improve scanning performance
742+
* @param depth maximum scanning depth
742743
* @param isNested a value indicating if a parent {@link Repository} was already found above the {@code files}
743744
* @return collection of added repositories
744745
*/
745746
private Collection<RepositoryInfo> addRepositories(File[] files, int allowedNesting, int depth, boolean isNested) {
746747

748+
if (depth < 0) {
749+
throw new IllegalArgumentException("depth is negative");
750+
}
751+
747752
List<RepositoryInfo> repoList = new ArrayList<>();
748753
PathAccepter pathAccepter = env.getPathAccepter();
749754

@@ -772,27 +777,28 @@ private Collection<RepositoryInfo> addRepositories(File[] files, int allowedNest
772777
new Object[] {file, e.getMessage()});
773778
continue;
774779
}
780+
775781
if (repository == null) {
776-
if (depth > env.getScanningDepth()) {
777-
// we reached our search max depth, skip looking through the children
782+
if (depth == 0) {
783+
// Reached maximum depth, skip looking through the children.
778784
continue;
779785
}
786+
780787
// Not a repository, search its sub-dirs.
781788
if (pathAccepter.accept(file)) {
782789
File[] subFiles = file.listFiles();
783790
if (subFiles == null) {
784791
LOGGER.log(Level.WARNING,
785-
"Failed to get sub directories for ''{0}'', " +
786-
"check access permissions.",
792+
"Failed to get sub directories for ''{0}'', check access permissions.",
787793
file.getAbsolutePath());
788794
} else {
789795
// Recursive call to scan next depth
790796
repoList.addAll(addRepositories(subFiles,
791-
allowedNesting, depth + 1, isNested));
797+
allowedNesting, depth - 1, isNested));
792798
}
793799
}
794800
} else {
795-
LOGGER.log(Level.CONFIG, "Adding <{0}> repository: <{1}>",
801+
LOGGER.log(Level.CONFIG, "Adding <{0}> repository for ''{1}''",
796802
new Object[]{repository.getClass().getName(), path});
797803

798804
repoList.add(new RepositoryInfo(repository));
@@ -804,17 +810,15 @@ private Collection<RepositoryInfo> addRepositories(File[] files, int allowedNest
804810
LOGGER.log(Level.WARNING,
805811
"Failed to get sub directories for ''{0}'', check access permissions.",
806812
file.getAbsolutePath());
807-
} else if (depth <= env.getScanningDepth()) {
808-
// Search down to a limit -- if not: too much
809-
// stat'ing for huge Mercurial repositories
813+
} else if (depth > 0) {
810814
repoList.addAll(addRepositories(subFiles,
811-
allowedNesting - 1, depth + 1, true));
815+
allowedNesting - 1, depth - 1, true));
812816
}
813817
}
814818
}
815819
} catch (IOException exp) {
816820
LOGGER.log(Level.WARNING,
817-
"Failed to get canonical path for {0}: {1}",
821+
"Failed to get canonical path for ''{0}'': {1}",
818822
new Object[]{file.getAbsolutePath(), exp.getMessage()});
819823
LOGGER.log(Level.WARNING, "Repository will be ignored...", exp);
820824
}
@@ -824,21 +828,36 @@ private Collection<RepositoryInfo> addRepositories(File[] files, int allowedNest
824828
}
825829

826830
/**
827-
* Recursively search for repositories in given directories, add those found
828-
* to the internally used repository map.
831+
* Recursively search for repositories in given directories, add those found to the internally used map.
829832
*
830833
* @param files list of directories to check if they contain a repository
831834
* @return collection of added repositories
832835
*/
833836
public Collection<RepositoryInfo> addRepositories(File[] files) {
834837
ExecutorService executor = env.getIndexerParallelizer().getFixedExecutor();
835838
List<Future<Collection<RepositoryInfo>>> futures = new ArrayList<>();
839+
List<RepositoryInfo> repoList = new ArrayList<>();
840+
836841
for (File file: files) {
842+
/*
843+
* Adjust scan depth based on source root path. Some directories can be symbolic links pointing
844+
* outside source root so avoid constructing canonical paths for the computation to work.
845+
*/
846+
int levelsBelowSourceRoot;
847+
try {
848+
String relativePath = env.getPathRelativeToSourceRoot(file);
849+
levelsBelowSourceRoot = Path.of(relativePath).getNameCount();
850+
} catch (IOException | ForbiddenSymlinkException e) {
851+
LOGGER.log(Level.WARNING, "cannot get path relative to source root for ''{0}'', " +
852+
"skipping repository scan for this directory", file);
853+
continue;
854+
}
855+
final int scanDepth = env.getScanningDepth() - levelsBelowSourceRoot;
856+
837857
futures.add(executor.submit(() -> addRepositories(new File[]{file},
838-
env.getNestingMaximum(), 0, false)));
858+
env.getNestingMaximum(), scanDepth, false)));
839859
}
840860

841-
List<RepositoryInfo> repoList = new ArrayList<>();
842861
futures.forEach(future -> {
843862
try {
844863
repoList.addAll(future.get());
@@ -853,23 +872,21 @@ public Collection<RepositoryInfo> addRepositories(File[] files) {
853872
}
854873

855874
/**
856-
* Recursively search for repositories in given directories, add those found
857-
* to the internally used repository map.
875+
* Recursively search for repositories in given directories, add those found to the internally used map.
858876
*
859877
* @param repos collection of repository paths
860-
* @return collection of added repositories
878+
* @return collection of {@link RepositoryInfo} objects
861879
*/
862880
public Collection<RepositoryInfo> addRepositories(Collection<String> repos) {
863881
return addRepositories(repos.stream().map(File::new).toArray(File[]::new));
864882
}
865883

866884
/**
867885
* Get collection of repositories used internally by HistoryGuru.
868-
* @return collection of repositories
886+
* @return collection of {@link RepositoryInfo} objects
869887
*/
870888
public Collection<RepositoryInfo> getRepositories() {
871-
return repositories.values().stream().
872-
map(RepositoryInfo::new).collect(Collectors.toSet());
889+
return repositories.values().stream().map(RepositoryInfo::new).collect(Collectors.toSet());
873890
}
874891

875892
private void createHistoryCache(Repository repository, String sinceRevision) {

opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2011, Jens Elkner.
2323
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected]>.
2424
*/
@@ -58,6 +58,7 @@
5858
import java.util.stream.Collectors;
5959

6060
import org.apache.commons.lang3.SystemUtils;
61+
import org.jetbrains.annotations.TestOnly;
6162
import org.opengrok.indexer.Info;
6263
import org.opengrok.indexer.Metrics;
6364
import org.opengrok.indexer.analysis.AnalyzerGuru;
@@ -86,12 +87,14 @@
8687
/**
8788
* Creates and updates an inverted source index as well as generates Xref, file
8889
* stats etc., if specified in the options.
89-
*
90+
* <p>
9091
* We shall use / as path delimiter in whole opengrok for uuids and paths
9192
* from Windows systems, the path shall be converted when entering the index or web
9293
* and converted back if needed* to access original file
93-
*
94-
* *Windows already supports opening /var/opengrok as C:\var\opengrok
94+
* </p>
95+
* <p>
96+
* Windows already supports opening {@code /var/opengrok} as {@code C:\var\opengrok}
97+
* </p>
9598
*/
9699
@SuppressWarnings({"PMD.AvoidPrintStackTrace", "PMD.SystemPrintln"})
97100
public final class Indexer {
@@ -361,6 +364,12 @@ public static void main(String[] argv) {
361364
// Create history cache first.
362365
if (searchRepositories) {
363366
if (searchPaths.isEmpty()) {
367+
/*
368+
* No search paths were specified. This means searching for the repositories under source root.
369+
* To speed the process up, gather the directories directly underneath source root.
370+
* The HistoryGuru#addRepositories(File[], int) will search for the repositories
371+
* in these directories in parallel.
372+
*/
364373
String[] dirs = env.getSourceRootFile().
365374
list((f, name) -> f.isDirectory() && env.getPathAccepter().accept(f));
366375
if (dirs != null) {
@@ -583,7 +592,7 @@ public static String[] parseOptions(String[] argv) throws ParseException {
583592

584593
parser.on("--depth", "=number", Integer.class,
585594
"Scanning depth for repositories in directory structure relative to",
586-
"source root. Default is " + Configuration.defaultScanningDepth + ".").execute(depth ->
595+
"source root. Default is " + Configuration.DEFAULT_SCANNING_DEPTH + ".").execute(depth ->
587596
cfg.setScanningDepth((Integer) depth));
588597

589598
parser.on("--disableRepository", "=type_name",
@@ -785,9 +794,9 @@ public static String[] parseOptions(String[] argv) throws ParseException {
785794
.execute(v -> handlePathParameter(repositories, ((String) v).trim()));
786795

787796
parser.on("-S", "--search", "=[path/to/repository|@file_with_paths]",
788-
"Search for source repositories under -s,--source, and add them. Path",
789-
"(relative to the source root) is optional. ",
790-
"File containing paths can be specified via @path syntax.",
797+
"Search for source repositories under source root (-s,--source),",
798+
"and add them. Path (relative to the source root) is optional. ",
799+
"File containing the paths can be specified via @path syntax.",
791800
"Option may be repeated.")
792801
.execute(v -> {
793802
searchRepositories = true;
@@ -979,6 +988,7 @@ public static void writeConfigToFile(RuntimeEnvironment env, String filename) th
979988
/**
980989
* Wrapper for prepareIndexer() that always generates history cache.
981990
*/
991+
@TestOnly
982992
public void prepareIndexer(RuntimeEnvironment env,
983993
boolean searchRepositories,
984994
boolean addProjects,
@@ -998,7 +1008,7 @@ public void prepareIndexer(RuntimeEnvironment env,
9981008
* history per directory).
9991009
* </p>
10001010
* @param env runtime environment
1001-
* @param searchPaths list of paths in which to search for repositories
1011+
* @param searchPaths list of paths relative to source root in which to search for repositories
10021012
* @param addProjects if true, add projects
10031013
* @param createHistoryCache create history cache flag
10041014
* @param subFiles list of directories

opengrok-indexer/src/test/java/org/opengrok/indexer/history/HistoryGuruTest.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2019, 2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.history;
@@ -352,20 +352,24 @@ void testScanningDepth() throws IOException {
352352
certainlyMkdirs(sub3);
353353

354354
int originalScanDepth = env.getScanningDepth();
355-
env.setScanningDepth(0);
356355

356+
// Test default scan depth value.
357357
HistoryGuru instance = HistoryGuru.getInstance();
358358
Collection<RepositoryInfo> addedRepos = instance.addRepositories(
359359
Collections.singleton(Paths.get(repository.getSourceRoot(), topLevelDirName).toString()));
360-
assertEquals(1, addedRepos.size(), "should add to max depth");
360+
assertEquals(2, addedRepos.size(),
361+
"2 repositories should be discovered with default scan depth setting");
361362

362-
env.setScanningDepth(1);
363+
// Reduction of scan depth should cause reduction of repositories.
363364
List<String> repoDirs = addedRepos.stream().map(RepositoryInfo::getDirectoryName).collect(Collectors.toList());
364365
instance.removeRepositories(repoDirs);
366+
env.setScanningDepth(originalScanDepth - 1);
365367
addedRepos = instance.addRepositories(
366368
Collections.singleton(Paths.get(repository.getSourceRoot(), topLevelDirName).toString()));
367-
assertEquals(2, addedRepos.size(), "should add to increased max depth");
369+
assertEquals(1, addedRepos.size(),
370+
"1 repository should be discovered with reduced scan depth");
368371

372+
// cleanup
369373
env.setScanningDepth(originalScanDepth);
370374
}
371375

opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/ProjectsController.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.web.api.v1.controller;
@@ -156,8 +156,7 @@ private List<RepositoryInfo> getRepositoriesInDir(final File projDir) {
156156
HistoryGuru histGuru = HistoryGuru.getInstance();
157157

158158
// There is no need to perform the work of invalidateRepositories(),
159-
// since addRepositories() calls getRepository() for each of
160-
// the repos.
159+
// since addRepositories() calls getRepository() for each of the repositories.
161160
return new ArrayList<>(histGuru.addRepositories(new File[]{projDir}));
162161
}
163162

0 commit comments

Comments
 (0)