|  | 
| 1 | 1 | #!/usr/bin/env perl | 
| 2 | 2 | # | 
| 3 |  | -# Copyright (c) 2008 Cisco Systems, Inc.  All rights reserved. | 
|  | 3 | +# Copyright (c) 2008-2016 Cisco Systems, Inc.  All rights reserved. | 
| 4 | 4 | # | 
| 5 | 5 | 
 | 
| 6 | 6 | use strict; | 
|  | 7 | + | 
| 7 | 8 | use Data::Dumper; | 
| 8 | 9 | 
 | 
| 9 | 10 | # Ensure that we're in the root of a writeable Git clone | 
|  | 
| 14 | 15 | 
 | 
| 15 | 16 | ###################################################################### | 
| 16 | 17 | 
 | 
|  | 18 | +my $header_sep = "-----"; | 
|  | 19 | +my $unknown_org = "********* NO ORGANIZATION SET ********"; | 
|  | 20 | + | 
|  | 21 | +my $people; | 
|  | 22 | + | 
|  | 23 | +###################################################################### | 
|  | 24 | + | 
| 17 | 25 | # Run git log to get a list of committers | 
| 18 | 26 | 
 | 
| 19 |  | -my $committers; | 
| 20 |  | -open (GIT, "git log --pretty=format:%ae|") || die "Can't run 'git log'."; | 
|  | 27 | +open (GIT, "git log --format=tformat:'%aN <%aE>'|") || die "Can't run 'git log'."; | 
| 21 | 28 | while (<GIT>) { | 
| 22 | 29 |     chomp; | 
| 23 |  | -    m/^\s*([\S]+)\s*$/; | 
|  | 30 | +    m/^\s*(.+)\s+<(.+)>\s*$/; | 
|  | 31 | + | 
|  | 32 | +    if (!exists($people->{$1})) { | 
|  | 33 | +        # The person doesn't exist, so save a new entry | 
|  | 34 | +        $people->{$1} = { | 
|  | 35 | +            name => $1, | 
|  | 36 | +            org => $unknown_org, | 
|  | 37 | +            emails => { | 
|  | 38 | +                lc($2) => 1, | 
|  | 39 | +            } | 
|  | 40 | +        }; | 
| 24 | 41 | 
 | 
| 25 |  | -    if (!exists($committers->{$1})) { | 
| 26 |  | -        $committers->{$1} = { }; | 
| 27 |  | -        print "Found Git commit email: $1\n"; | 
|  | 42 | + | 
|  | 43 | +        print "Found Git committer: $1 <$2>\n"; | 
|  | 44 | +    } else { | 
|  | 45 | +        # The person already exists, so just add (or overwrite) this | 
|  | 46 | +        # email address | 
|  | 47 | +        $people->{$1}->{emails}->{$2} = 1; | 
| 28 | 48 |     } | 
| 29 | 49 | } | 
| 30 | 50 | close(GIT); | 
| 31 | 51 | 
 | 
| 32 |  | -# Read the existing AUTHORS file to get the header, footer, and Git | 
| 33 |  | -# email ID -> (gecos, affiliation) mappings. | 
|  | 52 | +###################################################################### | 
|  | 53 | + | 
|  | 54 | +# Read the existing AUTHORS file | 
| 34 | 55 | 
 | 
| 35 | 56 | my $header; | 
| 36 |  | -my $footer; | 
| 37 | 57 | 
 | 
| 38 | 58 | print "Matching Git emails to existing names/affiliations...\n"; | 
| 39 | 59 | 
 | 
|  | 60 | +sub save { | 
|  | 61 | +    my $current = shift; | 
|  | 62 | + | 
|  | 63 | +    print "Saving person from AUTHORS: $current->{name}\n"; | 
|  | 64 | + | 
|  | 65 | +    # We may overwrite an entry written from the git log, but that's | 
|  | 66 | +    # ok | 
|  | 67 | +    $people->{$current->{name}} = $current; | 
|  | 68 | +} | 
|  | 69 | + | 
| 40 | 70 | open (AUTHORS, "AUTHORS") || die "Can't open AUTHORS file"; | 
| 41 | 71 | my $in_header = 1; | 
| 42 |  | -my $in_footer = 0; | 
|  | 72 | +my $current = undef; | 
| 43 | 73 | while (<AUTHORS>) { | 
| 44 | 74 |     chomp; | 
| 45 | 75 |     my $line = $_; | 
| 46 | 76 | 
 | 
| 47 |  | -    # Slurp down header lines until we hit a line that begins with an | 
| 48 |  | -    # Git email | 
|  | 77 | +    # Slurp down header lines until we hit a line that begins with | 
|  | 78 | +    # $header_sep | 
| 49 | 79 |     if ($in_header) { | 
| 50 |  | -        foreach my $git_email (keys(%{$committers})) { | 
| 51 |  | -            if ($line =~ /$git_email\s+/) { | 
| 52 |  | -                $in_header = 0; | 
| 53 |  | -            } | 
| 54 |  | -        } | 
| 55 |  | -        if ($in_header) { | 
| 56 |  | -            $header .= "$_\n"; | 
|  | 80 | +        $header .= "$line\n"; | 
|  | 81 | + | 
|  | 82 | +        if ($_ =~ /^$header_sep/) { | 
|  | 83 | +            $in_header = 0; | 
|  | 84 | + | 
|  | 85 | +            # There should be a blank line after this, too | 
|  | 86 | +            $header .= "\n"; | 
| 57 | 87 |         } | 
|  | 88 | +        next; | 
| 58 | 89 |     } | 
| 59 | 90 | 
 | 
| 60 |  | -    # If we're in the body, parse to get the existing Git emails, gecos, | 
| 61 |  | -    # and affiliations | 
| 62 |  | -    if (!$in_header && !$in_footer) { | 
| 63 |  | - | 
| 64 |  | -        # Make sure we have a line that begins with an Git email; | 
| 65 |  | -        # otherwise, fall through to the footer. | 
| 66 |  | -        my $found = undef; | 
| 67 |  | -        my $git_email; | 
| 68 |  | -        foreach $git_email (keys(%{$committers})) { | 
| 69 |  | -            if ($line =~ /$git_email\s+/) { | 
| 70 |  | -                $found = $git_email; | 
| 71 |  | -                last; | 
| 72 |  | -            } | 
| 73 |  | -        } | 
| 74 |  | -        if (!$found) { | 
| 75 |  | -            $in_footer = 1; | 
|  | 91 | +    # Skip blank lines | 
|  | 92 | +    next | 
|  | 93 | +        if ($line =~ /^\s*$/); | 
|  | 94 | + | 
|  | 95 | +    # Format of body: | 
|  | 96 | +    # | 
|  | 97 | +    # NAME, Affiliation 1[, Affiliation 2[...]] | 
|  | 98 | +    #   Email address 1 | 
|  | 99 | +    #   [Email address 2] | 
|  | 100 | +    #   [...] | 
|  | 101 | +    # NAME, Affiliation 1[, Affiliation 2[...]] | 
|  | 102 | +    #   Email address 1 | 
|  | 103 | +    #   [Email address 2] | 
|  | 104 | +    #   [...] | 
|  | 105 | + | 
|  | 106 | +    # Found a new email address for an existing person | 
|  | 107 | +    if ($line =~ /^  /) { | 
|  | 108 | +        m/^  (.+)$/; | 
|  | 109 | +        $current->{emails}->{lc($1)} = 1; | 
|  | 110 | + | 
|  | 111 | +        next; | 
|  | 112 | +    } else { | 
|  | 113 | +        # Found a new person; save the old entry | 
|  | 114 | +        save($current) | 
|  | 115 | +            if (defined($current)); | 
|  | 116 | + | 
|  | 117 | +        $current = undef; | 
|  | 118 | +        $current->{org} = $unknown_org; | 
|  | 119 | +        if ($line =~ m/^(.+?),\s+(.+)$/) { | 
|  | 120 | +            $current->{name} = $1; | 
|  | 121 | +            $current->{org} = $2; | 
| 76 | 122 |         } else { | 
| 77 |  | -            $line =~ m/^$found\s+(.+?)\s{2,}(.+)$/; | 
| 78 |  | -            my $gecos = $1; | 
| 79 |  | -            my $aff = $2; | 
| 80 |  | - | 
| 81 |  | -            if ($gecos =~ /^\s+$/) { | 
| 82 |  | -                $gecos = "<UNKNOWN>"; | 
| 83 |  | -            } else { | 
| 84 |  | -                $committers->{$found}->{gecos} = $gecos; | 
| 85 |  | -            } | 
| 86 |  | -            if ($aff =~ /^\s+$/) { | 
| 87 |  | -                $aff = "<UNKNOWN>"; | 
| 88 |  | -            } else { | 
| 89 |  | -                $committers->{$found}->{affiliation} = $aff; | 
| 90 |  | -            } | 
| 91 |  | -            print "Git email $found matches: $gecos / $aff\n"; | 
|  | 123 | +            $current->{name} = $line; | 
| 92 | 124 |         } | 
| 93 |  | -    } | 
| 94 | 125 | 
 | 
| 95 |  | -    # If we're in the footer, just save all the lines | 
| 96 |  | -    if ($in_footer) { | 
| 97 |  | -        $footer .= "$_\n"; | 
|  | 126 | +        next; | 
| 98 | 127 |     } | 
| 99 | 128 | } | 
|  | 129 | + | 
|  | 130 | +save($current) | 
|  | 131 | +    if (defined($current)); | 
|  | 132 | + | 
| 100 | 133 | close(AUTHORS); | 
| 101 | 134 | 
 | 
| 102 |  | -# Figure out the 3 column widths.  The last line of the header | 
| 103 |  | -# contains -'s for each of the columns. | 
|  | 135 | +###################################################################### | 
| 104 | 136 | 
 | 
| 105 |  | -$header =~ m/\n([\-\s]+?)$/m; | 
| 106 |  | -my $div_line = $1; | 
| 107 |  | -my @divs = split(/ /, $div_line); | 
| 108 |  | -my $id_col = length($divs[0]); | 
| 109 |  | -my $gecos_col = length($divs[1]); | 
| 110 |  | -my $aff_col = length($divs[2]); | 
|  | 137 | +# Output a new AUTHORS file | 
| 111 | 138 | 
 | 
| 112 |  | -# Print out a new AUTHORS file | 
| 113 | 139 | open (AUTHORS, ">AUTHORS.new") || die "Can't write to AUTHORS file"; | 
|  | 140 | + | 
| 114 | 141 | print AUTHORS $header; | 
| 115 |  | -my $i; | 
| 116 |  | -my $have_unknowns = 0; | 
| 117 |  | -foreach my $git_email (sort(keys(%${committers}))) { | 
| 118 |  | -    # Skip the automated accounts | 
| 119 |  | -    next | 
| 120 |  | -        if ($git_email eq "no-author\@open-mpi.org" || | 
| 121 |  | -            $git_email eq "mpiteam\@open-mpi.org"); | 
| 122 |  | - | 
| 123 |  | -    print AUTHORS $git_email; | 
| 124 |  | -    $i = length($git_email); | 
| 125 |  | -    while ($i <= $id_col) { | 
| 126 |  | -        print AUTHORS ' '; | 
| 127 |  | -        ++$i; | 
|  | 142 | + | 
|  | 143 | +my @people_with_unknown_orgs; | 
|  | 144 | +my $email_dups; | 
|  | 145 | + | 
|  | 146 | +my @sorted_people = sort(keys(%{$people})); | 
|  | 147 | +foreach my $p (@sorted_people) { | 
|  | 148 | +    print AUTHORS $p; | 
|  | 149 | +    if (exists($people->{$p}->{org})) { | 
|  | 150 | +        print AUTHORS ", $people->{$p}->{org}"; | 
|  | 151 | + | 
|  | 152 | +        # Record this so that we can warn about it | 
|  | 153 | +        push(@people_with_unknown_orgs, $p) | 
|  | 154 | +            if ($people->{$p}->{org} eq $unknown_org); | 
| 128 | 155 |     } | 
|  | 156 | +    print AUTHORS "\n"; | 
|  | 157 | + | 
|  | 158 | +    foreach my $e (sort(keys(%{$people->{$p}->{emails}}))) { | 
|  | 159 | +        # Sanity check: make sure this email address does not show up | 
|  | 160 | +        # with any other person/name | 
|  | 161 | +        my $dup; | 
|  | 162 | +        foreach my $p2 (@sorted_people) { | 
|  | 163 | +            next | 
|  | 164 | +                if ($p eq $p2); | 
|  | 165 | + | 
|  | 166 | +            foreach my $e2 (keys(%{$people->{$p2}->{emails}})) { | 
|  | 167 | +                if ($e eq $e2) { | 
|  | 168 | +                    $dup = $p2; | 
|  | 169 | + | 
|  | 170 | +                    # Record this so that we can warn about it | 
|  | 171 | +                    if ($p le $p2) { | 
|  | 172 | +                        $email_dups->{$p} = $p2; | 
|  | 173 | +                    } else { | 
|  | 174 | +                        $email_dups->{$p2} = $p; | 
|  | 175 | +                    } | 
|  | 176 | +                    last; | 
|  | 177 | +                } | 
|  | 178 | +            } | 
| 129 | 179 | 
 | 
| 130 |  | -    # if we have gecos/affiliation, print them.  Otherwise, just end | 
| 131 |  | -    # the line here | 
| 132 |  | -    if ((exists($committers->{$git_email}->{gecos}) && | 
| 133 |  | -         $committers->{$git_email}->{gecos} !~ /^\s+$/) || | 
| 134 |  | -        (exists($committers->{$git_email}->{affiliation}) && | 
| 135 |  | -         $committers->{$git_email}->{affiliation} !~ /^\s+$/)) { | 
| 136 |  | -        print AUTHORS $committers->{$git_email}->{gecos}; | 
| 137 |  | -        $i = length($committers->{$git_email}->{gecos}); | 
| 138 |  | -        while ($i <= $gecos_col) { | 
| 139 |  | -            print AUTHORS ' '; | 
| 140 |  | -            ++$i; | 
|  | 180 | +            last | 
|  | 181 | +                if (defined($dup)); | 
| 141 | 182 |         } | 
| 142 | 183 | 
 | 
| 143 |  | -        print AUTHORS $committers->{$git_email}->{affiliation} | 
| 144 |  | -            if (exists($committers->{$git_email}->{affiliation})); | 
| 145 |  | -    } else { | 
| 146 |  | -        $have_unknowns = 1; | 
|  | 184 | +        print AUTHORS "  $e"; | 
|  | 185 | +        print AUTHORS " (**** DUPLICATE EMAIL ADDRESS WITH $dup ***)" | 
|  | 186 | +            if (defined($dup)); | 
|  | 187 | +        print AUTHORS "\n"; | 
| 147 | 188 |     } | 
| 148 |  | -    print AUTHORS "\n"; | 
| 149 | 189 | } | 
| 150 |  | -print AUTHORS $footer; | 
| 151 | 190 | close(AUTHORS); | 
| 152 | 191 | 
 | 
|  | 192 | +# We have a new AUTHORS file!  Replace the old one. | 
| 153 | 193 | unlink("AUTHORS"); | 
| 154 | 194 | rename("AUTHORS.new", "AUTHORS"); | 
| 155 | 195 | 
 | 
| 156 | 196 | print "New AUTHORS file written.\n"; | 
| 157 |  | -if ($have_unknowns) { | 
| 158 |  | -    print "*** WARNING: There were Git committers with unknown real names and/or\n*** affiliations.  You *MUST* edit the AUTHORS file to fill them in!\n"; | 
| 159 |  | -} else { | 
| 160 |  | -    print "All Git emails were matched! No need to hand-edit the AUTHORS file.\n"; | 
|  | 197 | + | 
|  | 198 | +###################################################################### | 
|  | 199 | + | 
|  | 200 | +# Output any relevant warnings | 
|  | 201 | + | 
|  | 202 | +my $warned = 0; | 
|  | 203 | +if ($#people_with_unknown_orgs >= 0) { | 
|  | 204 | +    $warned = 1; | 
|  | 205 | +    print "\n*** WARNING: The following people have unspecified organiations:\n"; | 
|  | 206 | +    foreach my $p (@people_with_unknown_orgs) { | 
|  | 207 | +        print "***   $p\n"; | 
|  | 208 | +    } | 
|  | 209 | +} | 
|  | 210 | + | 
|  | 211 | +my @k = sort(keys(%{$email_dups})); | 
|  | 212 | +if ($#k >= 0) { | 
|  | 213 | +    $warned = 1; | 
|  | 214 | +    print "\n*** WARNING: The following people had the same email address:\n"; | 
|  | 215 | +    foreach my $p (@k) { | 
|  | 216 | +        print "***   $p, $email_dups->{$p}\n"; | 
|  | 217 | +    } | 
|  | 218 | +} | 
|  | 219 | + | 
|  | 220 | +if ($warned) { | 
|  | 221 | +    print " | 
|  | 222 | +******************************************************************************* | 
|  | 223 | +*** YOU SHOULD EDIT THE .mailmap AND/OR AUTHORS FILE TO RESOLVE THESE WARNINGS! | 
|  | 224 | +*******************************************************************************\n"; | 
| 161 | 225 | } | 
| 162 | 226 | 
 | 
|  | 227 | +exit($warned); | 
0 commit comments