Skip to content

Commit 0920398

Browse files
authored
History collector vs merge changesets (#4221)
fixes #4027
1 parent f10696b commit 0920398

File tree

7 files changed

+177
-18
lines changed

7 files changed

+177
-18
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,12 @@ public void store(History history, Repository repository, String tillRevision) t
441441
}
442442

443443
HashMap<String, List<HistoryEntry>> map = new HashMap<>();
444-
latestRev = createFileMap(history, map);
444+
String fileMapLatestRev = createFileMap(history, map);
445+
if (history.getLatestRev() != null) {
446+
latestRev = history.getLatestRev();
447+
} else {
448+
latestRev = fileMapLatestRev;
449+
}
445450

446451
// File based history cache does not store files for individual changesets so strip them.
447452
history.strip();
@@ -563,7 +568,7 @@ public void storeRenamed(Set<String> renamedFiles, Repository repository, String
563568
LOGGER.log(Level.SEVERE, "latch exception", ex);
564569
}
565570
}
566-
LOGGER.log(Level.FINE, "Stored history for {0} renamed files in repository ''{1}''",
571+
LOGGER.log(Level.FINE, "Stored history for {0} renamed files in repository {1}",
567572
new Object[]{renamedFileHistoryCount.intValue(), repository});
568573
}
569574

opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -498,18 +498,13 @@ public void traverseHistory(File file, String sinceRevision, String tillRevision
498498
commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage());
499499

500500
for (ChangesetVisitor visitor : visitors) {
501-
// Even though the repository itself is set (not) to consume the merge changesets,
502-
// it should be up to the visitor to have the say. This is because of the history based reindex.
503-
if (commit.getParentCount() > 1 && !visitor.consumeMergeChangesets) {
504-
continue;
505-
}
506-
507501
if (isDirectory) {
508502
SortedSet<String> files = new TreeSet<>();
509503
final Set<String> renamedFiles = new HashSet<>();
510504
final Set<String> deletedFiles = new HashSet<>();
511505
getFilesForCommit(renamedFiles, files, deletedFiles, commit, repository);
512-
visitor.accept(new ChangesetInfo(commitInfo, files, renamedFiles, deletedFiles));
506+
visitor.accept(new ChangesetInfo(commitInfo, files, renamedFiles, deletedFiles,
507+
commit.getParentCount() > 1));
513508
} else {
514509
visitor.accept(new ChangesetInfo(commitInfo));
515510
}

opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2007, 2023, Oracle and/or its affiliates. All rights reserved.
2222
* Portions Copyright (c) 2019, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.history;
@@ -48,12 +48,16 @@ public class History implements Serializable {
4848
/** Entries in the log. The first entry is the most recent one. */
4949
private List<HistoryEntry> entries;
5050
/**
51-
* track renamed files so they can be treated in special way (for some
52-
* SCMs) during cache creation.
51+
* Track renamed files, so they can be treated in special way (for some SCMs) during cache creation.
5352
* These are relative to repository root.
5453
*/
5554
private final Set<String> renamedFiles;
5655

56+
/**
57+
* Revision of the newest change. Used in history cache.
58+
*/
59+
private String latestRev;
60+
5761
// revision to tag list. Individual tags are joined via TAGS_SEPARATOR.
5862
private Map<String, String> tags = new HashMap<>();
5963

@@ -75,6 +79,15 @@ public History() {
7579
this.renamedFiles = renamed;
7680
}
7781

82+
History(List<HistoryEntry> entries, Set<String> renamed, String latestRev) {
83+
this(entries, renamed);
84+
this.latestRev = latestRev;
85+
}
86+
87+
public String getLatestRev() {
88+
return latestRev;
89+
}
90+
7891
// Needed for serialization.
7992
public Map<String, String> getTags() {
8093
return tags;

opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
2222
*/
2323
package org.opengrok.indexer.history;
2424

@@ -31,6 +31,8 @@ class HistoryCollector extends ChangesetVisitor {
3131
List<HistoryEntry> entries;
3232
Set<String> renamedFiles;
3333

34+
String latestRev;
35+
3436
HistoryCollector(boolean consumeMergeChangesets) {
3537
super(consumeMergeChangesets);
3638
entries = new ArrayList<>();
@@ -40,6 +42,17 @@ class HistoryCollector extends ChangesetVisitor {
4042
public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) {
4143
RepositoryWithHistoryTraversal.CommitInfo commit = changesetInfo.commit;
4244

45+
// Make sure to record the revision even though this is a merge changeset
46+
// and the collector does not want to consume these.
47+
// The changesets are visited from newest to oldest, so record just the first one.
48+
if (latestRev == null) {
49+
latestRev = commit.revision;
50+
}
51+
52+
if (changesetInfo.isMerge != null && changesetInfo.isMerge && !consumeMergeChangesets) {
53+
return;
54+
}
55+
4356
// TODO: add a test for this
4457
String author;
4558
if (commit.authorEmail != null) {

opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
2222
*/
2323
package org.opengrok.indexer.history;
2424

@@ -69,6 +69,7 @@ public static class ChangesetInfo {
6969
public SortedSet<String> files;
7070
public Set<String> renamedFiles;
7171
public Set<String> deletedFiles;
72+
public Boolean isMerge;
7273

7374
ChangesetInfo(CommitInfo commit) {
7475
this.commit = commit;
@@ -78,10 +79,16 @@ public static class ChangesetInfo {
7879
}
7980

8081
ChangesetInfo(CommitInfo commit, SortedSet<String> files, Set<String> renamedFiles, Set<String> deletedFiles) {
82+
this(commit, files, renamedFiles, deletedFiles, null);
83+
}
84+
85+
ChangesetInfo(CommitInfo commit, SortedSet<String> files, Set<String> renamedFiles, Set<String> deletedFiles,
86+
Boolean isMerge) {
8187
this.commit = commit;
8288
this.files = files;
8389
this.renamedFiles = renamedFiles;
8490
this.deletedFiles = deletedFiles;
91+
this.isMerge = isMerge;
8592
}
8693
}
8794

@@ -106,7 +113,8 @@ public History getHistory(File file, String sinceRevision, String tillRevision,
106113

107114
HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled());
108115
traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector));
109-
History history = new History(historyCollector.entries, historyCollector.renamedFiles);
116+
History history = new History(historyCollector.entries, historyCollector.renamedFiles,
117+
historyCollector.latestRev);
110118

111119
// Assign tags to changesets they represent.
112120
if (RuntimeEnvironment.getInstance().isTagsEnabled() && hasFileBasedTags()) {
@@ -139,7 +147,8 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire
139147
visitors.add(fileCollector);
140148
}
141149
traverseHistory(directory, sinceRevision, null, null, visitors);
142-
History history = new History(historyCollector.entries, historyCollector.renamedFiles);
150+
History history = new History(historyCollector.entries, historyCollector.renamedFiles,
151+
historyCollector.latestRev);
143152

144153
// Assign tags to changesets they represent.
145154
if (env.isTagsEnabled() && hasFileBasedTags()) {
@@ -173,7 +182,8 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire
173182
visitors.add(fileCollector);
174183
}
175184
traverseHistory(directory, sinceRevision, tillRevision, null, visitors);
176-
History history = new History(historyCollector.entries, historyCollector.renamedFiles);
185+
History history = new History(historyCollector.entries, historyCollector.renamedFiles,
186+
historyCollector.latestRev);
177187

178188
// Assign tags to changesets they represent.
179189
if (env.isTagsEnabled() && hasFileBasedTags()) {
@@ -182,7 +192,7 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire
182192

183193
finishCreateCache(cache, history, tillRevision);
184194
sinceRevision = tillRevision;
185-
stat.report(LOGGER, Level.FINE, String.format("finished chunk %d/%d of history cache for repository ''%s''",
195+
stat.report(LOGGER, Level.FINE, String.format("Finished chunk %d/%d of history cache for repository '%s'",
186196
++cnt, boundaryChangesetList.size(), this.getDirectoryName()));
187197
}
188198

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opengrok.indexer.history;
24+
25+
import org.eclipse.jgit.api.Git;
26+
import org.eclipse.jgit.api.ResetCommand;
27+
import org.junit.jupiter.api.AfterAll;
28+
import org.junit.jupiter.api.BeforeAll;
29+
import org.junit.jupiter.api.Test;
30+
import org.junit.jupiter.params.ParameterizedTest;
31+
import org.junit.jupiter.params.provider.ValueSource;
32+
import org.opengrok.indexer.configuration.RuntimeEnvironment;
33+
import org.opengrok.indexer.util.TestRepository;
34+
35+
import java.io.File;
36+
37+
import static org.junit.jupiter.api.Assertions.assertEquals;
38+
import static org.junit.jupiter.api.Assertions.assertFalse;
39+
import static org.junit.jupiter.api.Assertions.assertNotNull;
40+
41+
/**
42+
* Test that even though merge changesets are disabled in history cache,
43+
* they are accounted for w.r.t. the last changeset visited.
44+
* This prevents duplicate documents to be created in the index on subsequent
45+
* indexer runs.
46+
* Tested only for Git.
47+
*/
48+
public class TestHistoryCollectorVsMergeChangesets {
49+
private static TestRepository repository = new TestRepository();
50+
51+
private static final RuntimeEnvironment env = RuntimeEnvironment.getInstance();
52+
53+
private static final String lastRevision = "4d1b7cfb";
54+
55+
@BeforeAll
56+
public static void setUpClass() throws Exception {
57+
repository = new TestRepository();
58+
repository.create(TestHistoryCollectorVsMergeChangesets.class.getResourceAsStream(
59+
"/history/git-merge.zip"));
60+
env.setMergeCommitsEnabled(false);
61+
}
62+
63+
@AfterAll
64+
public static void tearDownClass() {
65+
repository.destroy();
66+
repository = null;
67+
}
68+
69+
@ParameterizedTest
70+
@ValueSource(booleans = {true, false})
71+
void testReindexWithHistoryBasedRepository(boolean usePerPartes) throws Exception {
72+
env.setHistoryCachePerPartesEnabled(usePerPartes);
73+
74+
File origRepositoryRoot = new File(repository.getSourceRoot(), "git-merge");
75+
File localPath = new File(repository.getSourceRoot(),
76+
"gitCloneTestHistoryCollector" + (usePerPartes ? "-perPartes" : ""));
77+
String cloneUrl = origRepositoryRoot.toURI().toString();
78+
try (Git gitClone = Git.cloneRepository()
79+
.setURI(cloneUrl)
80+
.setDirectory(localPath)
81+
.call()) {
82+
83+
final String intermediateRevision = "f3ddb4ba";
84+
85+
gitClone.reset().setMode(ResetCommand.ResetType.HARD).
86+
setRef(intermediateRevision).call();
87+
88+
// Reset hard to certain changeset.
89+
File repositoryRoot = gitClone.getRepository().getWorkTree();
90+
GitRepository repo = (GitRepository) RepositoryFactory.getRepository(repositoryRoot);
91+
assertFalse(repo.isMergeCommitsEnabled());
92+
93+
FileHistoryCache cache = new FileHistoryCache();
94+
cache.initialize();
95+
96+
repo.doCreateCache(cache, null, repositoryRoot);
97+
assertEquals(intermediateRevision, cache.getLatestCachedRevision(repo));
98+
99+
// Pull the remaining changesets from the origin.
100+
gitClone.pull().call();
101+
102+
repo.doCreateCache(cache, null, repositoryRoot);
103+
assertEquals(lastRevision, cache.getLatestCachedRevision(repo));
104+
}
105+
}
106+
107+
@Test
108+
void testCacheStore() throws Exception {
109+
File repositoryRoot = new File(repository.getSourceRoot(), "git-merge");
110+
GitRepository repo = (GitRepository) RepositoryFactory.getRepository(repositoryRoot);
111+
assertFalse(repo.isMergeCommitsEnabled());
112+
113+
FileHistoryCache cache = new FileHistoryCache();
114+
cache.initialize();
115+
116+
// Use the getHistory() + cache.store() instead of repo.doCreateCache(),
117+
// so that different code path for traverseHistory() is exercised.
118+
History history = repo.getHistory(repositoryRoot);
119+
assertNotNull(history);
120+
cache.store(history, repo);
121+
assertEquals(lastRevision, cache.getLatestCachedRevision(repo));
122+
}
123+
}
Binary file not shown.

0 commit comments

Comments
 (0)