Skip to content

Commit 0996dd3

Browse files
Georgios Kontaxisgitster
authored andcommitted
gitweb: add "e-mail privacy" feature to redact e-mail addresses
Gitweb extracts content from the Git log and makes it accessible over HTTP. As a result, e-mail addresses found in commits are exposed to web crawlers and they may not respect robots.txt. This can result in unsolicited messages. Introduce an 'email-privacy' feature which redacts e-mail addresses from the generated HTML content. Specifically, obscure addresses retrieved from the the author/committer and comment sections of the Git log. The feature is off by default. This feature does not prevent someone from downloading the unredacted commit log, e.g., by cloning the repository, and extracting information from it. It aims to hinder the low- effort, bulk collection of e-mail addresses by web crawlers. Signed-off-by: Georgios Kontaxis <[email protected]> Acked-by: Eric Wong <[email protected]> Acked-by: Ævar Arnfjörð Bjarmason <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 84d06cd commit 0996dd3

File tree

2 files changed

+38
-7
lines changed

2 files changed

+38
-7
lines changed

Documentation/gitweb.conf.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,17 @@ default font sizes or lineheights are changed (e.g. via adding extra
751751
CSS stylesheet in `@stylesheets`), it may be appropriate to change
752752
these values.
753753

754+
email-privacy::
755+
Redact e-mail addresses from the generated HTML, etc. content.
756+
This obscures e-mail addresses retrieved from the author/committer
757+
and comment sections of the Git log.
758+
It is meant to hinder web crawlers that harvest and abuse addresses.
759+
Such crawlers may not respect robots.txt.
760+
Note that users and user tools also see the addresses as redacted.
761+
If Gitweb is not the final step in a workflow then subsequent steps
762+
may misbehave because of the redacted information they receive.
763+
Disabled by default.
764+
754765
highlight::
755766
Server-side syntax highlight support in "blob" view. It requires
756767
`$highlight_bin` program to be available (see the description of

gitweb/gitweb.perl

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,15 @@ sub evaluate_uri {
569569
'sub' => \&feature_extra_branch_refs,
570570
'override' => 0,
571571
'default' => []},
572+
573+
# Redact e-mail addresses.
574+
575+
# To enable system wide have in $GITWEB_CONFIG
576+
# $feature{'email-privacy'}{'default'} = [1];
577+
'email-privacy' => {
578+
'sub' => sub { feature_bool('email-privacy', @_) },
579+
'override' => 1,
580+
'default' => [0]},
572581
);
573582

574583
sub gitweb_get_feature {
@@ -3449,6 +3458,13 @@ sub parse_date {
34493458
return %date;
34503459
}
34513460

3461+
sub hide_mailaddrs_if_private {
3462+
my $line = shift;
3463+
return $line unless gitweb_check_feature('email-privacy');
3464+
$line =~ s/<[^@>]+@[^>]+>/<redacted>/g;
3465+
return $line;
3466+
}
3467+
34523468
sub parse_tag {
34533469
my $tag_id = shift;
34543470
my %tag;
@@ -3465,7 +3481,7 @@ sub parse_tag {
34653481
} elsif ($line =~ m/^tag (.+)$/) {
34663482
$tag{'name'} = $1;
34673483
} elsif ($line =~ m/^tagger (.*) ([0-9]+) (.*)$/) {
3468-
$tag{'author'} = $1;
3484+
$tag{'author'} = hide_mailaddrs_if_private($1);
34693485
$tag{'author_epoch'} = $2;
34703486
$tag{'author_tz'} = $3;
34713487
if ($tag{'author'} =~ m/^([^<]+) <([^>]*)>/) {
@@ -3513,7 +3529,7 @@ sub parse_commit_text {
35133529
} elsif ((!defined $withparents) && ($line =~ m/^parent ($oid_regex)$/)) {
35143530
push @parents, $1;
35153531
} elsif ($line =~ m/^author (.*) ([0-9]+) (.*)$/) {
3516-
$co{'author'} = to_utf8($1);
3532+
$co{'author'} = hide_mailaddrs_if_private(to_utf8($1));
35173533
$co{'author_epoch'} = $2;
35183534
$co{'author_tz'} = $3;
35193535
if ($co{'author'} =~ m/^([^<]+) <([^>]*)>/) {
@@ -3523,7 +3539,7 @@ sub parse_commit_text {
35233539
$co{'author_name'} = $co{'author'};
35243540
}
35253541
} elsif ($line =~ m/^committer (.*) ([0-9]+) (.*)$/) {
3526-
$co{'committer'} = to_utf8($1);
3542+
$co{'committer'} = hide_mailaddrs_if_private(to_utf8($1));
35273543
$co{'committer_epoch'} = $2;
35283544
$co{'committer_tz'} = $3;
35293545
if ($co{'committer'} =~ m/^([^<]+) <([^>]*)>/) {
@@ -3568,9 +3584,10 @@ sub parse_commit_text {
35683584
if (! defined $co{'title'} || $co{'title'} eq "") {
35693585
$co{'title'} = $co{'title_short'} = '(no commit message)';
35703586
}
3571-
# remove added spaces
3587+
# remove added spaces, redact e-mail addresses if applicable.
35723588
foreach my $line (@commit_lines) {
35733589
$line =~ s/^ //;
3590+
$line = hide_mailaddrs_if_private($line);
35743591
}
35753592
$co{'comment'} = \@commit_lines;
35763593

@@ -7489,7 +7506,8 @@ sub git_log_generic {
74897506
-accesskey => "n", -title => "Alt-n"}, "next");
74907507
}
74917508
my $patch_max = gitweb_get_feature('patches');
7492-
if ($patch_max && !defined $file_name) {
7509+
if ($patch_max && !defined $file_name &&
7510+
!gitweb_check_feature('email-privacy')) {
74937511
if ($patch_max < 0 || @commitlist <= $patch_max) {
74947512
$paging_nav .= " &sdot; " .
74957513
$cgi->a({-href => href(action=>"patches", -replay=>1)},
@@ -7550,7 +7568,8 @@ sub git_commit {
75507568
} @$parents ) .
75517569
')';
75527570
}
7553-
if (gitweb_check_feature('patches') && @$parents <= 1) {
7571+
if (gitweb_check_feature('patches') && @$parents <= 1 &&
7572+
!gitweb_check_feature('email-privacy')) {
75547573
$formats_nav .= " | " .
75557574
$cgi->a({-href => href(action=>"patch", -replay=>1)},
75567575
"patch");
@@ -7863,7 +7882,8 @@ sub git_commitdiff {
78637882
$formats_nav =
78647883
$cgi->a({-href => href(action=>"commitdiff_plain", -replay=>1)},
78657884
"raw");
7866-
if ($patch_max && @{$co{'parents'}} <= 1) {
7885+
if ($patch_max && @{$co{'parents'}} <= 1 &&
7886+
!gitweb_check_feature('email-privacy')) {
78677887
$formats_nav .= " | " .
78687888
$cgi->a({-href => href(action=>"patch", -replay=>1)},
78697889
"patch");

0 commit comments

Comments
 (0)