Skip to content

Commit 1ba1e9e

Browse files
committed
make-authors.pl: Auto-generate the entire AUTHORS file
Update the script to auto-generate the entire AUTHORS file from two sources: 1. The existing AUTHORS file 2. The output from "git log --format=tformat:=tformat:'%aN <%aE>'" Merge these two together (which will preserve organization affiliations) and warn in two cases: 1. If a person has no organization affiliation 1. If the same email address appears for more than one person Signed-off-by: Jeff Squyres <[email protected]>
1 parent 7a25233 commit 1ba1e9e

File tree

1 file changed

+163
-98
lines changed

1 file changed

+163
-98
lines changed

contrib/dist/make-authors.pl

Lines changed: 163 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
#!/usr/bin/env perl
22
#
3-
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
3+
# Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved.
44
#
55

66
use strict;
7+
78
use Data::Dumper;
89

910
# Ensure that we're in the root of a writeable Git clone
@@ -14,149 +15,213 @@
1415

1516
######################################################################
1617

18+
my $header_sep = "-----";
19+
my $unknown_org = "********* NO ORGANIZATION SET ********";
20+
21+
my $people;
22+
23+
######################################################################
24+
1725
# Run git log to get a list of committers
1826

19-
my $committers;
20-
open (GIT, "git log --pretty=format:%ae|") || die "Can't run 'git log'.";
27+
open (GIT, "git log --format=tformat:'%aN <%aE>'|") || die "Can't run 'git log'.";
2128
while (<GIT>) {
2229
chomp;
23-
m/^\s*([\S]+)\s*$/;
30+
m/^\s*(.+)\s+<(.+)>\s*$/;
31+
32+
if (!exists($people->{$1})) {
33+
# The person doesn't exist, so save a new entry
34+
$people->{$1} = {
35+
name => $1,
36+
org => $unknown_org,
37+
emails => {
38+
lc($2) => 1,
39+
}
40+
};
2441

25-
if (!exists($committers->{$1})) {
26-
$committers->{$1} = { };
27-
print "Found Git commit email: $1\n";
42+
43+
print "Found Git committer: $1 <$2>\n";
44+
} else {
45+
# The person already exists, so just add (or overwrite) this
46+
# email address
47+
$people->{$1}->{emails}->{$2} = 1;
2848
}
2949
}
3050
close(GIT);
3151

32-
# Read the existing AUTHORS file to get the header, footer, and Git
33-
# email ID -> (gecos, affiliation) mappings.
52+
######################################################################
53+
54+
# Read the existing AUTHORS file
3455

3556
my $header;
36-
my $footer;
3757

3858
print "Matching Git emails to existing names/affiliations...\n";
3959

60+
sub save {
61+
my $current = shift;
62+
63+
print "Saving person from AUTHORS: $current->{name}\n";
64+
65+
# We may overwrite an entry written from the git log, but that's
66+
# ok
67+
$people->{$current->{name}} = $current;
68+
}
69+
4070
open (AUTHORS, "AUTHORS") || die "Can't open AUTHORS file";
4171
my $in_header = 1;
42-
my $in_footer = 0;
72+
my $current = undef;
4373
while (<AUTHORS>) {
4474
chomp;
4575
my $line = $_;
4676

47-
# Slurp down header lines until we hit a line that begins with an
48-
# Git email
77+
# Slurp down header lines until we hit a line that begins with
78+
# $header_sep
4979
if ($in_header) {
50-
foreach my $git_email (keys(%{$committers})) {
51-
if ($line =~ /$git_email\s+/) {
52-
$in_header = 0;
53-
}
54-
}
55-
if ($in_header) {
56-
$header .= "$_\n";
80+
$header .= "$line\n";
81+
82+
if ($_ =~ /^$header_sep/) {
83+
$in_header = 0;
84+
85+
# There should be a blank line after this, too
86+
$header .= "\n";
5787
}
88+
next;
5889
}
5990

60-
# If we're in the body, parse to get the existing Git emails, gecos,
61-
# and affiliations
62-
if (!$in_header && !$in_footer) {
63-
64-
# Make sure we have a line that begins with an Git email;
65-
# otherwise, fall through to the footer.
66-
my $found = undef;
67-
my $git_email;
68-
foreach $git_email (keys(%{$committers})) {
69-
if ($line =~ /$git_email\s+/) {
70-
$found = $git_email;
71-
last;
72-
}
73-
}
74-
if (!$found) {
75-
$in_footer = 1;
91+
# Skip blank lines
92+
next
93+
if ($line =~ /^\s*$/);
94+
95+
# Format of body:
96+
#
97+
# NAME, Affiliation 1[, Affiliation 2[...]]
98+
# Email address 1
99+
# [Email address 2]
100+
# [...]
101+
# NAME, Affiliation 1[, Affiliation 2[...]]
102+
# Email address 1
103+
# [Email address 2]
104+
# [...]
105+
106+
# Found a new email address for an existing person
107+
if ($line =~ /^ /) {
108+
m/^ (.+)$/;
109+
$current->{emails}->{lc($1)} = 1;
110+
111+
next;
112+
} else {
113+
# Found a new person; save the old entry
114+
save($current)
115+
if (defined($current));
116+
117+
$current = undef;
118+
$current->{org} = $unknown_org;
119+
if ($line =~ m/^(.+?),\s+(.+)$/) {
120+
$current->{name} = $1;
121+
$current->{org} = $2;
76122
} else {
77-
$line =~ m/^$found\s+(.+?)\s{2,}(.+)$/;
78-
my $gecos = $1;
79-
my $aff = $2;
80-
81-
if ($gecos =~ /^\s+$/) {
82-
$gecos = "<UNKNOWN>";
83-
} else {
84-
$committers->{$found}->{gecos} = $gecos;
85-
}
86-
if ($aff =~ /^\s+$/) {
87-
$aff = "<UNKNOWN>";
88-
} else {
89-
$committers->{$found}->{affiliation} = $aff;
90-
}
91-
print "Git email $found matches: $gecos / $aff\n";
123+
$current->{name} = $line;
92124
}
93-
}
94125

95-
# If we're in the footer, just save all the lines
96-
if ($in_footer) {
97-
$footer .= "$_\n";
126+
next;
98127
}
99128
}
129+
130+
save($current)
131+
if (defined($current));
132+
100133
close(AUTHORS);
101134

102-
# Figure out the 3 column widths. The last line of the header
103-
# contains -'s for each of the columns.
135+
######################################################################
104136

105-
$header =~ m/\n([\-\s]+?)$/m;
106-
my $div_line = $1;
107-
my @divs = split(/ /, $div_line);
108-
my $id_col = length($divs[0]);
109-
my $gecos_col = length($divs[1]);
110-
my $aff_col = length($divs[2]);
137+
# Output a new AUTHORS file
111138

112-
# Print out a new AUTHORS file
113139
open (AUTHORS, ">AUTHORS.new") || die "Can't write to AUTHORS file";
140+
114141
print AUTHORS $header;
115-
my $i;
116-
my $have_unknowns = 0;
117-
foreach my $git_email (sort(keys(%${committers}))) {
118-
# Skip the automated accounts
119-
next
120-
if ($git_email eq "no-author\@open-mpi.org" ||
121-
$git_email eq "mpiteam\@open-mpi.org");
122-
123-
print AUTHORS $git_email;
124-
$i = length($git_email);
125-
while ($i <= $id_col) {
126-
print AUTHORS ' ';
127-
++$i;
142+
143+
my @people_with_unknown_orgs;
144+
my $email_dups;
145+
146+
my @sorted_people = sort(keys(%{$people}));
147+
foreach my $p (@sorted_people) {
148+
print AUTHORS $p;
149+
if (exists($people->{$p}->{org})) {
150+
print AUTHORS ", $people->{$p}->{org}";
151+
152+
# Record this so that we can warn about it
153+
push(@people_with_unknown_orgs, $p)
154+
if ($people->{$p}->{org} eq $unknown_org);
128155
}
156+
print AUTHORS "\n";
157+
158+
foreach my $e (sort(keys(%{$people->{$p}->{emails}}))) {
159+
# Sanity check: make sure this email address does not show up
160+
# with any other person/name
161+
my $dup;
162+
foreach my $p2 (@sorted_people) {
163+
next
164+
if ($p eq $p2);
165+
166+
foreach my $e2 (keys(%{$people->{$p2}->{emails}})) {
167+
if ($e eq $e2) {
168+
$dup = $p2;
169+
170+
# Record this so that we can warn about it
171+
if ($p le $p2) {
172+
$email_dups->{$p} = $p2;
173+
} else {
174+
$email_dups->{$p2} = $p;
175+
}
176+
last;
177+
}
178+
}
129179

130-
# if we have gecos/affiliation, print them. Otherwise, just end
131-
# the line here
132-
if ((exists($committers->{$git_email}->{gecos}) &&
133-
$committers->{$git_email}->{gecos} !~ /^\s+$/) ||
134-
(exists($committers->{$git_email}->{affiliation}) &&
135-
$committers->{$git_email}->{affiliation} !~ /^\s+$/)) {
136-
print AUTHORS $committers->{$git_email}->{gecos};
137-
$i = length($committers->{$git_email}->{gecos});
138-
while ($i <= $gecos_col) {
139-
print AUTHORS ' ';
140-
++$i;
180+
last
181+
if (defined($dup));
141182
}
142183

143-
print AUTHORS $committers->{$git_email}->{affiliation}
144-
if (exists($committers->{$git_email}->{affiliation}));
145-
} else {
146-
$have_unknowns = 1;
184+
print AUTHORS " $e";
185+
print AUTHORS " (**** DUPLICATE EMAIL ADDRESS WITH $dup ***)"
186+
if (defined($dup));
187+
print AUTHORS "\n";
147188
}
148-
print AUTHORS "\n";
149189
}
150-
print AUTHORS $footer;
151190
close(AUTHORS);
152191

192+
# We have a new AUTHORS file! Replace the old one.
153193
unlink("AUTHORS");
154194
rename("AUTHORS.new", "AUTHORS");
155195

156196
print "New AUTHORS file written.\n";
157-
if ($have_unknowns) {
158-
print "*** WARNING: There were Git committers with unknown real names and/or\n*** affiliations. You *MUST* edit the AUTHORS file to fill them in!\n";
159-
} else {
160-
print "All Git emails were matched! No need to hand-edit the AUTHORS file.\n";
197+
198+
######################################################################
199+
200+
# Output any relevant warnings
201+
202+
my $warned = 0;
203+
if ($#people_with_unknown_orgs >= 0) {
204+
$warned = 1;
205+
print "\n*** WARNING: The following people have unspecified organiations:\n";
206+
foreach my $p (@people_with_unknown_orgs) {
207+
print "*** $p\n";
208+
}
209+
}
210+
211+
my @k = sort(keys(%{$email_dups}));
212+
if ($#k >= 0) {
213+
$warned = 1;
214+
print "\n*** WARNING: The following people had the same email address:\n";
215+
foreach my $p (@k) {
216+
print "*** $p, $email_dups->{$p}\n";
217+
}
218+
}
219+
220+
if ($warned) {
221+
print "
222+
*******************************************************************************
223+
*** YOU SHOULD EDIT THE .mailmap AND/OR AUTHORS FILE TO RESOLVE THESE WARNINGS!
224+
*******************************************************************************\n";
161225
}
162226

227+
exit($warned);

0 commit comments

Comments
 (0)