Skip to content

Commit f6bdcf6

Browse files
idodeclareVladimir Kotal
authored andcommitted
Fix #2986 : Use louie0817's bulk log-tags command
1 parent 3da17c7 commit f6bdcf6

File tree

5 files changed

+369
-97
lines changed

5 files changed

+369
-97
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java

Lines changed: 55 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017-2019, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2020, Chris Fraire <[email protected]>.
2323
* Portions Copyright (c) 2019, Krystof Tulinger <[email protected]>.
2424
*/
2525
package org.opengrok.indexer.history;
@@ -36,7 +36,6 @@
3636
import java.util.ArrayList;
3737
import java.util.Arrays;
3838
import java.util.Date;
39-
import java.util.LinkedList;
4039
import java.util.List;
4140
import java.util.TreeSet;
4241
import java.util.logging.Level;
@@ -345,8 +344,7 @@ String findOriginalName(String fullpath, String changeset)
345344
int status = executor.exec();
346345

347346
String originalFile = null;
348-
try (BufferedReader in = new BufferedReader(
349-
new InputStreamReader(executor.getOutputStream()))) {
347+
try (BufferedReader in = new BufferedReader(newLogReader(executor.getOutputStream()))) {
350348
String line;
351349
String rev = null;
352350
Matcher m;
@@ -385,7 +383,7 @@ String findOriginalName(String fullpath, String changeset)
385383
* Get first revision of given file without following renames.
386384
* @param fullpath file path to get first revision of
387385
*/
388-
private String getFirstRevision(String fullpath) throws IOException {
386+
private String getFirstRevision(String fullpath) {
389387
String[] argv = {
390388
ensureCommand(CMD_PROPERTY_KEY, CMD_FALLBACK),
391389
"rev-list",
@@ -568,68 +566,74 @@ boolean hasFileBasedTags() {
568566
return true;
569567
}
570568

571-
private TagEntry buildTagEntry(File directory, String tag, boolean interactive) {
572-
ArrayList<String> argv = new ArrayList<>();
573-
ensureCommand(CMD_PROPERTY_KEY, CMD_FALLBACK);
574-
argv.add(RepoCommand);
575-
argv.add("log");
576-
argv.add("--format=commit:%H%nDate:%at");
577-
argv.add("-n");
578-
argv.add("1");
579-
argv.add(tag);
580-
argv.add("--");
581-
582-
Executor executor = new Executor(argv, directory, interactive ?
583-
RuntimeEnvironment.getInstance().getInteractiveCommandTimeout() :
584-
RuntimeEnvironment.getInstance().getCommandTimeout());
585-
GitTagParser parser = new GitTagParser(tag);
586-
executor.exec(true, parser);
587-
return parser.getEntries().first();
588-
}
589-
590569
@Override
591570
protected void buildTagList(File directory, boolean interactive) {
592571
this.tagList = new TreeSet<>();
572+
573+
/*
574+
* Bulk log-tags command courtesy of GitHub's louie0817.
575+
*/
593576
ArrayList<String> argv = new ArrayList<>();
594-
LinkedList<String> tagsList = new LinkedList<>();
595577
ensureCommand(CMD_PROPERTY_KEY, CMD_FALLBACK);
596578
argv.add(RepoCommand);
597-
argv.add("tag");
598-
579+
argv.add("log");
580+
argv.add("--tags");
581+
argv.add("--simplify-by-decoration");
582+
argv.add("--pretty=%H:%at:%D:");
583+
599584
Executor executor = new Executor(argv, directory, interactive ?
600585
RuntimeEnvironment.getInstance().getInteractiveCommandTimeout() :
601586
RuntimeEnvironment.getInstance().getCommandTimeout());
602-
int status = executor.exec();
603-
604-
try {
605-
// First we have to obtain list of all tags, and put it aside
606-
// Otherwise we can't use git to get date & hash for each tag
607-
try (BufferedReader in = new BufferedReader(new InputStreamReader(
608-
executor.getOutputStream()))) {
609-
String line;
610-
while ((line = in.readLine()) != null) {
611-
tagsList.add(line);
612-
}
613-
}
614-
} catch (IOException e) {
615-
LOGGER.log(Level.WARNING,
616-
"Failed to read tag list: {0}", e.getMessage());
617-
this.tagList = null;
587+
int status = executor.exec(true, new GitTagParser(this.tagList));
588+
if (LOGGER.isLoggable(Level.FINEST)) {
589+
LOGGER.log(Level.FINEST, "Read tags count={0} for {1}",
590+
new Object[] {tagList.size(), directory});
618591
}
619592

620593
if (status != 0) {
621594
LOGGER.log(Level.WARNING,
622595
"Failed to get tags for: \"{0}\" Exit code: {1}",
623596
new Object[]{directory.getAbsolutePath(), String.valueOf(status)});
624-
this.tagList = null;
625-
}
626-
627-
// Now get hash & date for each tag.
628-
for (String tag : tagsList) {
629-
TagEntry tagEntry = buildTagEntry(directory, tag, interactive);
630-
// Reverse the order of the list
631-
this.tagList.add(tagEntry);
597+
// In case of partial success, do not null-out tagList here.
632598
}
599+
600+
/*
601+
* Note that the former algorithm ran `git tag` first. `git tag` lists
602+
* tags in lexicographic order, so the former algorithm with its final
603+
* reverse() would have produced a result that was reverse-
604+
* lexicographically ordered.
605+
*
606+
* Repository technically relies on the tag list to be reverse-ancestor
607+
* ordered.
608+
*
609+
* This algorithm by using `git log --tags` results in reverse-ancestor
610+
* order. But there is also a technicality that Repository
611+
* searches ancestry by using TagEntry.compareTo(HistoryEntry). For
612+
* an SCM that uses "linear revision numbering" (e.g. SVN or Mercurial),
613+
* the search is reliable.
614+
*
615+
* For GitTagEntry.compareTo(HistoryEntry) that compares by date, the
616+
* search can technically terminate too early if ancestor-order is not
617+
* monotonic by date (which Git allows with no complaint).
618+
*
619+
* Linus Torvalds: [When asking] "'can commit X be an ancestor of
620+
* commit Y' (as a way to basically limit certain algorithms from
621+
* having to walk all the way down). We've used commit dates for it,
622+
* and realistically it really has worked very well. But it was always
623+
* a broken heuristic."
624+
*
625+
* "I think the lack of [generation numbers] is literally the only real
626+
* design mistake we have [in Git]."
627+
*
628+
* "We discussed adding generation numbers about 6 years ago [in 2005].
629+
* We clearly *should* have done it. Instead, we went with the hacky
630+
* `let's use commit time', that everybody really knew was technically
631+
* wrong, and was a hack, but avoided the need."
632+
*
633+
* If Git ever gets standard generation numbers,
634+
* GitTagEntry.compareTo() should be revised to work reliably akin to
635+
* an SCM that uses "linear revision numbering."
636+
*/
633637
}
634638

635639
@Override

opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitTagEntry.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,23 @@
1919

2020
/*
2121
* Copyright (c) 2012, 2018 Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2020, Chris Fraire <[email protected]>.
2223
*/
2324
package org.opengrok.indexer.history;
2425

2526
import java.util.Date;
2627

2728
/**
2829
* Git specific tag class with ability to compare itself with generic
29-
* HistoryEntry.
30+
* {@link HistoryEntry}.
3031
*
3132
* @author Stanislav Kozina
3233
*/
33-
public class GitTagEntry extends TagEntry {
34+
class GitTagEntry extends TagEntry {
3435

35-
private final String hash;
36+
final String hash;
3637

37-
public GitTagEntry(String hash, Date date, String tags) {
38+
GitTagEntry(String hash, Date date, String tags) {
3839
super(date, tags);
3940
this.hash = hash;
4041
}

opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitTagParser.java

Lines changed: 74 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,72 +19,106 @@
1919

2020
/*
2121
* Copyright (c) 2018, 2019 Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2020, Chris Fraire <[email protected]>.
2223
*/
2324
package org.opengrok.indexer.history;
2425

2526
import java.io.BufferedReader;
2627
import java.io.IOException;
2728
import java.io.InputStream;
28-
import java.io.InputStreamReader;
29+
import java.util.ArrayList;
2930
import java.util.Date;
3031
import java.util.TreeSet;
32+
import java.util.logging.Level;
33+
import java.util.logging.Logger;
34+
import java.util.regex.Matcher;
35+
import java.util.regex.Pattern;
36+
37+
import org.opengrok.indexer.logger.LoggerFactory;
3138
import org.opengrok.indexer.util.Executor;
3239

3340
/**
3441
* handles parsing the output of the {@code git log} command
3542
* into a set of tag entries.
3643
*/
37-
public class GitTagParser implements Executor.StreamHandler {
44+
class GitTagParser implements Executor.StreamHandler {
45+
46+
private static final Logger LOGGER = LoggerFactory.getLogger(GitTagParser.class);
47+
3848
/**
39-
* Store tag entries created by {@link processStream()}.
49+
* E.g. as follows and producing a single capture, $1, for a tag name.
50+
* <p>506e0a1ddf50341a0603af27ecc254ccb72d7dcb:1473681887:tag: 0.12.1.6, origin/0.12-stable:
51+
* <p>d305482d0acf552ccd290d6133a52547b8da16be:1427209918:tag: 0.12.1.5:
4052
*/
41-
private final TreeSet<TagEntry> entries = new TreeSet<>();
42-
43-
private final String tags;
44-
45-
GitTagParser(String tags) {
46-
this.tags = tags;
47-
}
48-
53+
private static final Pattern PRETTY_TAG_MATCHER =
54+
Pattern.compile("tag:\\s+(\\S[^,:]+)(?:,\\s+|:)");
55+
4956
/**
50-
* Returns the set of entries that has been created.
51-
*
52-
* @return entries a set of tag entries
57+
* Stores the externally provided set.
5358
*/
54-
public TreeSet<TagEntry> getEntries() {
55-
return entries;
59+
private final TreeSet<TagEntry> entries;
60+
61+
GitTagParser(TreeSet<TagEntry> entries) {
62+
this.entries = entries;
5663
}
5764

5865
@Override
5966
public void processStream(InputStream input) throws IOException {
60-
String hash = null;
61-
Date date = null;
62-
63-
try (BufferedReader in = new BufferedReader(new InputStreamReader(input))) {
67+
ArrayList<String> tagNames = new ArrayList<>();
68+
69+
try (BufferedReader reader = new BufferedReader(GitRepository.newLogReader(input))) {
6470
String line;
65-
while ((line = in.readLine()) != null) {
66-
if (line.startsWith("commit")) {
67-
String[] parts = line.split(":");
68-
if (parts.length < 2) {
69-
throw new IOException("Tag line contains more than 2 columns: " + line);
70-
}
71-
hash = parts[1];
71+
while ((line = reader.readLine()) != null) {
72+
String originalLine = line;
73+
String hash;
74+
Date date;
75+
int i;
76+
77+
/*
78+
* E.g.
79+
* 506e0a1ddf50341a0603af27ecc254ccb72d7dcb:1473681887:tag: 0.12.1.6, origin/0.12-stable:
80+
* d305482d0acf552ccd290d6133a52547b8da16be:1427209918:tag: 0.12.1.5:
81+
*/
82+
if ((i = line.indexOf(":")) == -1) {
83+
LOGGER.log(Level.WARNING, "Bad tags log for %H: {0}", originalLine);
84+
continue;
7285
}
73-
if (line.startsWith("Date")) {
74-
String[] parts = line.split(":");
75-
if (parts.length < 2) {
76-
throw new IOException("Tag line contains more than 2 columns: " + line);
77-
}
78-
date = new Date((long) (Integer.parseInt(parts[1])) * 1000);
86+
hash = line.substring(0, i);
87+
line = line.substring(i + 1);
88+
89+
if ((i = line.indexOf(":")) == -1) {
90+
LOGGER.log(Level.WARNING, "Bad tags log for %at: {0}", originalLine);
91+
continue;
7992
}
80-
}
81-
}
93+
String timestamp = line.substring(0, i);
94+
line = line.substring(i + 1);
95+
date = new Date((long) (Integer.parseInt(timestamp)) * 1000);
8296

83-
// Git can have tags not pointing to any commit, but tree instead
84-
// Lets use Unix timestamp of 0 for such commits
85-
if (date == null) {
86-
date = new Date(0);
97+
/*
98+
* GitHub's louie0817 identified a problem where multiple tags
99+
* for the same Git commit were not recognized since OpenGrok's
100+
* TagEntry equals() compares solely the date when the "repo
101+
* does not use linear numbering," and formerly OpenGrok was
102+
* parsing individual tags as separate entries. With
103+
* louie0817's suggested bulk command, the tag names appear on
104+
* the same line and can therefore be attached to the same
105+
* entry before inserting to the map.
106+
*/
107+
tagNames.clear();
108+
Matcher m = PRETTY_TAG_MATCHER.matcher(line);
109+
while (m.find()) {
110+
tagNames.add(m.group(1)); // $1
111+
}
112+
if (!tagNames.isEmpty()) {
113+
/*
114+
* See Repository assignTagsInHistory() where multiple
115+
* identified tags are joined with comma-space.
116+
*/
117+
String joinedTagNames = String.join(", ", tagNames);
118+
GitTagEntry tagEntry = new GitTagEntry(hash, date, joinedTagNames);
119+
entries.add(tagEntry);
120+
}
121+
}
87122
}
88-
entries.add(new GitTagEntry(hash, date, tags));
89123
}
90124
}

opengrok-indexer/src/main/java/org/opengrok/indexer/history/TagEntry.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2012, 2018 Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.history;
2525

@@ -93,7 +93,7 @@ public int compareTo(TagEntry that) {
9393
}
9494

9595
if (this.revision != NOREV) {
96-
return ((Integer) this.revision).compareTo(that.revision);
96+
return Integer.compare(this.revision, that.revision);
9797
}
9898
assert this.date != null : "date == null";
9999
return this.date.compareTo(that.date);

0 commit comments

Comments
 (0)