Skip to content

Commit 059d25e

Browse files
authored
Merge pull request #3951 from vladak/truly_incremental_reindex
history based reindex
2 parents 607bcff + 855e7d6 commit 059d25e

27 files changed

+1972
-440
lines changed

dev/checkstyle/suppressions.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ information: Portions Copyright [yyyy] [name of copyright owner]
1818
1919
CDDL HEADER END
2020
21-
Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
21+
Copyright (c) 2018, 2002, Oracle and/or its affiliates. All rights reserved.
2222
Portions Copyright (c) 2018-2020, Chris Fraire <[email protected]>.
2323
2424
-->
@@ -43,7 +43,7 @@ Portions Copyright (c) 2018-2020, Chris Fraire <[email protected]>.
4343
|Context\.java|HistoryContext\.java|Suggester\.java|
4444
|ProjectHelperTestBase\.java|SearchHelper\.java" />
4545

46-
<suppress checks="FileLength" files="RuntimeEnvironment\.java" />
46+
<suppress checks="FileLength" files="RuntimeEnvironment\.java|IndexDatabase\.java" />
4747

4848
<suppress checks="MethodLength" files="Indexer\.java|IndexDatabase\.java|AuthorizationFrameworkTest\.java" />
4949

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,8 @@ public final class Configuration {
300300
private int connectTimeout = -1; // connect timeout in seconds
301301
private int apiTimeout = -1; // API timeout in seconds
302302

303+
private boolean historyBasedReindex;
304+
303305
/*
304306
* types of handling history for remote SCM repositories:
305307
* ON - index history and display it in webapp
@@ -576,6 +578,7 @@ public Configuration() {
576578
setTagsEnabled(false);
577579
//setUserPage("http://www.myserver.org/viewProfile.jspa?username=");
578580
// Set to empty string so we can append it to the URL unconditionally later.
581+
setHistoryBasedReindex(true);
579582
setUserPageSuffix("");
580583
setWebappLAF("default");
581584
// webappCtags is default(boolean)
@@ -1412,6 +1415,14 @@ public void setApiTimeout(int apiTimeout) {
14121415
this.apiTimeout = apiTimeout;
14131416
}
14141417

1418+
public boolean isHistoryBasedReindex() {
1419+
return historyBasedReindex;
1420+
}
1421+
1422+
public void setHistoryBasedReindex(boolean flag) {
1423+
historyBasedReindex = flag;
1424+
}
1425+
14151426
/**
14161427
* Write the current configuration to a file.
14171428
*
@@ -1524,4 +1535,45 @@ private static Configuration decodeObject(InputStream in) throws IOException {
15241535

15251536
return conf;
15261537
}
1538+
1539+
public static class ConfigurationException extends Exception {
1540+
static final long serialVersionUID = -1;
1541+
1542+
public ConfigurationException(String message) {
1543+
super(message);
1544+
}
1545+
}
1546+
1547+
/**
1548+
* Check if configuration is populated and self-consistent.
1549+
* @throws ConfigurationException on error
1550+
*/
1551+
public void checkConfiguration() throws ConfigurationException {
1552+
1553+
if (getSourceRoot() == null) {
1554+
throw new ConfigurationException("Source root is not specified.");
1555+
}
1556+
1557+
if (getDataRoot() == null) {
1558+
throw new ConfigurationException("Data root is not specified.");
1559+
}
1560+
1561+
if (!new File(getSourceRoot()).canRead()) {
1562+
throw new ConfigurationException("Source root directory '" + getSourceRoot() + "' must be readable.");
1563+
}
1564+
1565+
if (!new File(getDataRoot()).canWrite()) {
1566+
throw new ConfigurationException("Data root directory '" + getDataRoot() + "' must be writable.");
1567+
}
1568+
1569+
if (!isHistoryEnabled() && isHistoryBasedReindex()) {
1570+
LOGGER.log(Level.INFO, "History based reindex is on, however history is off. " +
1571+
"History has to be enabled for history based reindex.");
1572+
}
1573+
1574+
if (!isHistoryCache() && isHistoryBasedReindex()) {
1575+
LOGGER.log(Level.INFO, "History based reindex is on, however history cache is off. " +
1576+
"History cache has to be enabled for history based reindex.");
1577+
}
1578+
}
15271579
}

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2018, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.configuration;
@@ -34,6 +34,7 @@
3434
import java.util.logging.Logger;
3535
import java.util.regex.PatternSyntaxException;
3636

37+
import org.jetbrains.annotations.VisibleForTesting;
3738
import org.opengrok.indexer.logger.LoggerFactory;
3839
import org.opengrok.indexer.util.ClassUtil;
3940
import org.opengrok.indexer.util.ForbiddenSymlinkException;
@@ -99,6 +100,11 @@ public class Project implements Comparable<Project>, Nameable, Serializable {
99100
*/
100101
private boolean indexed = false;
101102

103+
/**
104+
* This flag sets per-project reindex based on traversing SCM history.
105+
*/
106+
private Boolean historyBasedReindex = null;
107+
102108
/**
103109
* Set of groups which match this project.
104110
*/
@@ -289,6 +295,28 @@ public void setMergeCommitsEnabled(boolean flag) {
289295
this.mergeCommitsEnabled = flag;
290296
}
291297

298+
/**
299+
* @return true if this project handles renamed files.
300+
*/
301+
public boolean isHistoryBasedReindex() {
302+
return historyBasedReindex != null && historyBasedReindex;
303+
}
304+
305+
/**
306+
* @param flag true if project should handle renamed files, false otherwise.
307+
*/
308+
public void setHistoryBasedReindex(boolean flag) {
309+
this.historyBasedReindex = flag;
310+
}
311+
312+
@VisibleForTesting
313+
public void clearProperties() {
314+
historyBasedReindex = null;
315+
mergeCommitsEnabled = null;
316+
historyEnabled = null;
317+
handleRenamedFiles = null;
318+
}
319+
292320
/**
293321
* Return groups where this project belongs.
294322
*
@@ -436,6 +464,10 @@ public final void completeWithDefaults() {
436464
if (reviewPattern == null) {
437465
setReviewPattern(env.getReviewPattern());
438466
}
467+
468+
if (historyBasedReindex == null) {
469+
setHistoryBasedReindex(env.isHistoryBasedReindex());
470+
}
439471
}
440472

441473
/**
@@ -476,8 +508,7 @@ public static Project getProject(String path) {
476508
* Get the project for a specific file.
477509
*
478510
* @param file the file to lookup
479-
* @return the project that this file belongs to (or null if the file
480-
* doesn't belong to a project)
511+
* @return the project that this file belongs to (or {@code null} if the file doesn't belong to a project)
481512
*/
482513
public static Project getProject(File file) {
483514
Project ret = null;

opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import java.util.Collection;
3737
import java.util.Collections;
3838
import java.util.Date;
39+
import java.util.HashMap;
3940
import java.util.HashSet;
4041
import java.util.List;
4142
import java.util.Map;
@@ -62,8 +63,10 @@
6263
import org.apache.lucene.store.Directory;
6364
import org.apache.lucene.store.FSDirectory;
6465
import org.apache.lucene.util.NamedThreadFactory;
66+
import org.jetbrains.annotations.VisibleForTesting;
6567
import org.opengrok.indexer.authorization.AuthorizationFramework;
6668
import org.opengrok.indexer.authorization.AuthorizationStack;
69+
import org.opengrok.indexer.history.FileCollector;
6770
import org.opengrok.indexer.history.HistoryGuru;
6871
import org.opengrok.indexer.history.RepositoryInfo;
6972
import org.opengrok.indexer.index.IndexDatabase;
@@ -137,6 +140,12 @@ public List<String> getSubFiles() {
137140

138141
private final List<String> subFiles = new ArrayList<>();
139142

143+
/**
144+
* Maps project name to FileCollector object. This is used to pass the list of files acquired when
145+
* generating history cache in the first phase of indexing to the second phase of indexing.
146+
*/
147+
private final Map<String, FileCollector> fileCollectorMap = new HashMap<>();
148+
140149
/**
141150
* Creates a new instance of RuntimeEnvironment. Private to ensure a
142151
* singleton anti-pattern.
@@ -465,7 +474,7 @@ public List<Project> getProjectList() {
465474
/**
466475
* Get project map.
467476
*
468-
* @return a Map with all of the projects
477+
* @return a Map with all the projects
469478
*/
470479
public Map<String, Project> getProjects() {
471480
return syncReadConfiguration(Configuration::getProjects);
@@ -1417,6 +1426,27 @@ public void setConnectTimeout(int connectTimeout) {
14171426
syncWriteConfiguration(connectTimeout, Configuration::setConnectTimeout);
14181427
}
14191428

1429+
public boolean isHistoryBasedReindex() {
1430+
return syncReadConfiguration(Configuration::isHistoryBasedReindex);
1431+
}
1432+
1433+
public void setHistoryBasedReindex(boolean flag) {
1434+
syncWriteConfiguration(flag, Configuration::setHistoryBasedReindex);
1435+
}
1436+
1437+
public FileCollector getFileCollector(String name) {
1438+
return fileCollectorMap.get(name);
1439+
}
1440+
1441+
public void setFileCollector(String name, FileCollector fileCollector) {
1442+
fileCollectorMap.put(name, fileCollector);
1443+
}
1444+
1445+
@VisibleForTesting
1446+
public void clearFileCollector() {
1447+
fileCollectorMap.clear();
1448+
}
1449+
14201450
/**
14211451
* Read an configuration file and set it as the current configuration.
14221452
*
@@ -1491,7 +1521,8 @@ public void writeConfiguration(String host) throws IOException, InterruptedExcep
14911521
* Project with some repository information is considered as a repository
14921522
* otherwise it is just a simple project.
14931523
*/
1494-
private void generateProjectRepositoriesMap() throws IOException {
1524+
@VisibleForTesting
1525+
public void generateProjectRepositoriesMap() throws IOException {
14951526
repository_map.clear();
14961527
for (RepositoryInfo r : getRepositories()) {
14971528
Project proj;
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opengrok.indexer.history;
24+
25+
import java.util.function.Consumer;
26+
27+
public abstract class ChangesetVisitor implements Consumer<RepositoryWithHistoryTraversal.ChangesetInfo> {
28+
boolean consumeMergeChangesets;
29+
30+
protected ChangesetVisitor(boolean consumeMergeChangesets) {
31+
this.consumeMergeChangesets = consumeMergeChangesets;
32+
}
33+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opengrok.indexer.history;
24+
25+
import java.util.Collection;
26+
import java.util.SortedSet;
27+
import java.util.TreeSet;
28+
29+
/**
30+
* This class is meant to collect files that were touched in some way by SCM update.
31+
* The visitor argument contains the files separated based on the type of modification performed,
32+
* however the consumer of this class is not interested in this classification.
33+
* This is because when incrementally indexing a bunch of changesets,
34+
* in one changeset a file may be deleted, only to be re-added in the next changeset etc.
35+
*/
36+
public class FileCollector extends ChangesetVisitor {
37+
private final SortedSet<String> files;
38+
39+
/**
40+
* Assumes comparing in the same way as {@code org.opengrok.indexer.index.IndexDatabase#FILENAME_COMPARATOR}.
41+
*/
42+
public FileCollector(boolean consumeMergeChangesets) {
43+
super(consumeMergeChangesets);
44+
files = new TreeSet<>();
45+
}
46+
47+
public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) {
48+
if (changesetInfo.renamedFiles != null) {
49+
files.addAll(changesetInfo.renamedFiles);
50+
}
51+
if (changesetInfo.files != null) {
52+
files.addAll(changesetInfo.files);
53+
}
54+
if (changesetInfo.deletedFiles != null) {
55+
files.addAll(changesetInfo.deletedFiles);
56+
}
57+
}
58+
59+
public SortedSet<String> getFiles() {
60+
return files;
61+
}
62+
63+
void addFiles(Collection<String> files) {
64+
this.files.addAll(files);
65+
}
66+
}

0 commit comments

Comments
 (0)