Skip to content

Commit cfb0491

Browse files
bk2204gitster
authored andcommitted
gitweb: make hash size independent
Gitweb has several hard-coded 40 values throughout it to check for values that are passed in or acquired from Git. To simplify the code, introduce a regex variable that matches either exactly 40 or exactly 64 hex characters, and use this variable anywhere we would have previously hard-coded a 40 in a regex. Add some helper functions which allow us to write tighter regexes that match exactly the number of hex characters we're expecting. Similarly, switch the code that looks for deleted diffinfo information to look for either 40 or 64 zeros, and update one piece of code to use this function. Finally, when formatting a log line, allow an abbreviated describe output to contain up to 64 characters. Helped-by: Ævar Arnfjörð Bjarmason <[email protected]> Signed-off-by: brian m. carlson <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent bcbb44b commit cfb0491

File tree

1 file changed

+67
-30
lines changed

1 file changed

+67
-30
lines changed

gitweb/gitweb.perl

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,38 @@ sub check_loadavg {
788788
# ======================================================================
789789
# input validation and dispatch
790790

791+
# Various hash size-related values.
792+
my $sha1_len = 40;
793+
my $sha256_extra_len = 24;
794+
my $sha256_len = $sha1_len + $sha256_extra_len;
795+
796+
# A regex matching $len hex characters. $len may be a range (e.g. 7,64).
797+
sub oid_nlen_regex {
798+
my $len = shift;
799+
my $hchr = qr/[0-9a-fA-F]/;
800+
return qr/(?:(?:$hchr){$len})/;
801+
}
802+
803+
# A regex matching two sets of $nlen hex characters, prefixed by the literal
804+
# string $prefix and with the literal string $infix between them.
805+
sub oid_nlen_prefix_infix_regex {
806+
my $nlen = shift;
807+
my $prefix = shift;
808+
my $infix = shift;
809+
810+
my $rx = oid_nlen_regex($nlen);
811+
812+
return qr/^\Q$prefix\E$rx\Q$infix\E$rx$/;
813+
}
814+
815+
# A regex matching a valid object ID.
816+
our $oid_regex;
817+
{
818+
my $x = oid_nlen_regex($sha1_len);
819+
my $y = oid_nlen_regex($sha256_extra_len);
820+
$oid_regex = qr/(?:$x(?:$y)?)/;
821+
}
822+
791823
# input parameters can be collected from a variety of sources (presently, CGI
792824
# and PATH_INFO), so we define an %input_params hash that collects them all
793825
# together during validation: this allows subsequent uses (e.g. href()) to be
@@ -1516,7 +1548,7 @@ sub is_valid_refname {
15161548

15171549
return undef unless defined $input;
15181550
# textual hashes are O.K.
1519-
if ($input =~ m/^[0-9a-fA-F]{40}$/) {
1551+
if ($input =~ m/^$oid_regex$/) {
15201552
return 1;
15211553
}
15221554
# it must be correct pathname
@@ -2028,6 +2060,9 @@ sub file_type_long {
20282060
sub format_log_line_html {
20292061
my $line = shift;
20302062

2063+
# Potentially abbreviated OID.
2064+
my $regex = oid_nlen_regex("7,64");
2065+
20312066
$line = esc_html($line, -nbsp=>1);
20322067
$line =~ s{
20332068
\b
@@ -2037,10 +2072,10 @@ sub format_log_line_html {
20372072
(?<!-) # see strbuf_check_tag_ref(). Tags can't start with -
20382073
[A-Za-z0-9.-]+
20392074
(?!\.) # refs can't end with ".", see check_refname_format()
2040-
-g[0-9a-fA-F]{7,40}
2075+
-g$regex
20412076
|
20422077
# Just a normal looking Git SHA1
2043-
[0-9a-fA-F]{7,40}
2078+
$regex
20442079
)
20452080
\b
20462081
}{
@@ -2286,7 +2321,8 @@ sub format_extended_diff_header_line {
22862321
')</span>';
22872322
}
22882323
# match <hash>
2289-
if ($line =~ m/^index [0-9a-fA-F]{40},[0-9a-fA-F]{40}/) {
2324+
if ($line =~ oid_nlen_prefix_infix_regex($sha1_len, "index ", ",") |
2325+
$line =~ oid_nlen_prefix_infix_regex($sha256_len, "index ", ",")) {
22902326
# can match only for combined diff
22912327
$line = 'index ';
22922328
for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) {
@@ -2308,7 +2344,8 @@ sub format_extended_diff_header_line {
23082344
$line .= '0' x 7;
23092345
}
23102346

2311-
} elsif ($line =~ m/^index [0-9a-fA-F]{40}..[0-9a-fA-F]{40}/) {
2347+
} elsif ($line =~ oid_nlen_prefix_infix_regex($sha1_len, "index ", "..") |
2348+
$line =~ oid_nlen_prefix_infix_regex($sha256_len, "index ", "..")) {
23122349
# can match only for ordinary diff
23132350
my ($from_link, $to_link);
23142351
if ($from->{'href'}) {
@@ -2834,7 +2871,7 @@ sub git_get_hash_by_path {
28342871
}
28352872

28362873
#'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c'
2837-
$line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t/;
2874+
$line =~ m/^([0-9]+) (.+) ($oid_regex)\t/;
28382875
if (defined $type && $type ne $2) {
28392876
# type doesn't match
28402877
return undef;
@@ -3333,7 +3370,7 @@ sub git_get_references {
33333370

33343371
while (my $line = <$fd>) {
33353372
chomp $line;
3336-
if ($line =~ m!^([0-9a-fA-F]{40})\srefs/($type.*)$!) {
3373+
if ($line =~ m!^($oid_regex)\srefs/($type.*)$!) {
33373374
if (defined $refs{$1}) {
33383375
push @{$refs{$1}}, $2;
33393376
} else {
@@ -3407,7 +3444,7 @@ sub parse_tag {
34073444
$tag{'id'} = $tag_id;
34083445
while (my $line = <$fd>) {
34093446
chomp $line;
3410-
if ($line =~ m/^object ([0-9a-fA-F]{40})$/) {
3447+
if ($line =~ m/^object ($oid_regex)$/) {
34113448
$tag{'object'} = $1;
34123449
} elsif ($line =~ m/^type (.+)$/) {
34133450
$tag{'type'} = $1;
@@ -3451,15 +3488,15 @@ sub parse_commit_text {
34513488
}
34523489

34533490
my $header = shift @commit_lines;
3454-
if ($header !~ m/^[0-9a-fA-F]{40}/) {
3491+
if ($header !~ m/^$oid_regex/) {
34553492
return;
34563493
}
34573494
($co{'id'}, my @parents) = split ' ', $header;
34583495
while (my $line = shift @commit_lines) {
34593496
last if $line eq "\n";
3460-
if ($line =~ m/^tree ([0-9a-fA-F]{40})$/) {
3497+
if ($line =~ m/^tree ($oid_regex)$/) {
34613498
$co{'tree'} = $1;
3462-
} elsif ((!defined $withparents) && ($line =~ m/^parent ([0-9a-fA-F]{40})$/)) {
3499+
} elsif ((!defined $withparents) && ($line =~ m/^parent ($oid_regex)$/)) {
34633500
push @parents, $1;
34643501
} elsif ($line =~ m/^author (.*) ([0-9]+) (.*)$/) {
34653502
$co{'author'} = to_utf8($1);
@@ -3591,7 +3628,7 @@ sub parse_difftree_raw_line {
35913628

35923629
# ':100644 100644 03b218260e99b78c6df0ed378e59ed9205ccc96d 3b93d5e7cc7f7dd4ebed13a5cc1a4ad976fc94d8 M ls-files.c'
35933630
# ':100644 100644 7f9281985086971d3877aca27704f2aaf9c448ce bc190ebc71bbd923f2b728e505408f5e54bd073a M rev-tree.c'
3594-
if ($line =~ m/^:([0-7]{6}) ([0-7]{6}) ([0-9a-fA-F]{40}) ([0-9a-fA-F]{40}) (.)([0-9]{0,3})\t(.*)$/) {
3631+
if ($line =~ m/^:([0-7]{6}) ([0-7]{6}) ($oid_regex) ($oid_regex) (.)([0-9]{0,3})\t(.*)$/) {
35953632
$res{'from_mode'} = $1;
35963633
$res{'to_mode'} = $2;
35973634
$res{'from_id'} = $3;
@@ -3606,7 +3643,7 @@ sub parse_difftree_raw_line {
36063643
}
36073644
# '::100755 100755 100755 60e79ca1b01bc8b057abe17ddab484699a7f5fdb 94067cc5f73388f33722d52ae02f44692bc07490 94067cc5f73388f33722d52ae02f44692bc07490 MR git-gui/git-gui.sh'
36083645
# combined diff (for merge commit)
3609-
elsif ($line =~ s/^(::+)((?:[0-7]{6} )+)((?:[0-9a-fA-F]{40} )+)([a-zA-Z]+)\t(.*)$//) {
3646+
elsif ($line =~ s/^(::+)((?:[0-7]{6} )+)((?:$oid_regex )+)([a-zA-Z]+)\t(.*)$//) {
36103647
$res{'nparents'} = length($1);
36113648
$res{'from_mode'} = [ split(' ', $2) ];
36123649
$res{'to_mode'} = pop @{$res{'from_mode'}};
@@ -3616,7 +3653,7 @@ sub parse_difftree_raw_line {
36163653
$res{'to_file'} = unquote($5);
36173654
}
36183655
# 'c512b523472485aef4fff9e57b229d9d243c967f'
3619-
elsif ($line =~ m/^([0-9a-fA-F]{40})$/) {
3656+
elsif ($line =~ m/^($oid_regex)$/) {
36203657
$res{'commit'} = $1;
36213658
}
36223659

@@ -3644,7 +3681,7 @@ sub parse_ls_tree_line {
36443681

36453682
if ($opts{'-l'}) {
36463683
#'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa 16717 panic.c'
3647-
$line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40}) +(-|[0-9]+)\t(.+)$/s;
3684+
$line =~ m/^([0-9]+) (.+) ($oid_regex) +(-|[0-9]+)\t(.+)$/s;
36483685

36493686
$res{'mode'} = $1;
36503687
$res{'type'} = $2;
@@ -3657,7 +3694,7 @@ sub parse_ls_tree_line {
36573694
}
36583695
} else {
36593696
#'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c'
3660-
$line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t(.+)$/s;
3697+
$line =~ m/^([0-9]+) (.+) ($oid_regex)\t(.+)$/s;
36613698

36623699
$res{'mode'} = $1;
36633700
$res{'type'} = $2;
@@ -4799,7 +4836,7 @@ sub fill_from_file_info {
47994836
sub is_deleted {
48004837
my $diffinfo = shift;
48014838

4802-
return $diffinfo->{'to_id'} eq ('0' x 40);
4839+
return $diffinfo->{'to_id'} eq ('0' x 40) || $diffinfo->{'to_id'} eq ('0' x 64);
48034840
}
48044841

48054842
# does patch correspond to [previous] difftree raw line
@@ -6285,7 +6322,7 @@ sub git_search_changes {
62856322
-class => "list subject"},
62866323
chop_and_escape_str($co{'title'}, 50) . "<br/>");
62876324
} elsif (defined $set{'to_id'}) {
6288-
next if ($set{'to_id'} =~ m/^0{40}$/);
6325+
next if is_deleted(\%set);
62896326

62906327
print $cgi->a({-href => href(action=>"blob", hash_base=>$co{'id'},
62916328
hash=>$set{'to_id'}, file_name=>$set{'to_file'}),
@@ -6829,7 +6866,7 @@ sub git_blame_common {
68296866
# the header: <SHA-1> <src lineno> <dst lineno> [<lines in group>]
68306867
# no <lines in group> for subsequent lines in group of lines
68316868
my ($full_rev, $orig_lineno, $lineno, $group_size) =
6832-
($line =~ /^([0-9a-f]{40}) (\d+) (\d+)(?: (\d+))?$/);
6869+
($line =~ /^($oid_regex) (\d+) (\d+)(?: (\d+))?$/);
68336870
if (!exists $metainfo{$full_rev}) {
68346871
$metainfo{$full_rev} = { 'nprevious' => 0 };
68356872
}
@@ -6879,7 +6916,7 @@ sub git_blame_common {
68796916
}
68806917
# 'previous' <sha1 of parent commit> <filename at commit>
68816918
if (exists $meta->{'previous'} &&
6882-
$meta->{'previous'} =~ /^([a-fA-F0-9]{40}) (.*)$/) {
6919+
$meta->{'previous'} =~ /^($oid_regex) (.*)$/) {
68836920
$meta->{'parent'} = $1;
68846921
$meta->{'file_parent'} = unquote($2);
68856922
}
@@ -6996,7 +7033,7 @@ sub git_blob_plain {
69967033
} else {
69977034
die_error(400, "No file name defined");
69987035
}
6999-
} elsif ($hash =~ m/^[0-9a-fA-F]{40}$/) {
7036+
} elsif ($hash =~ m/^$oid_regex$/) {
70007037
# blobs defined by non-textual hash id's can be cached
70017038
$expires = "+1d";
70027039
}
@@ -7057,7 +7094,7 @@ sub git_blob {
70577094
} else {
70587095
die_error(400, "No file name defined");
70597096
}
7060-
} elsif ($hash =~ m/^[0-9a-fA-F]{40}$/) {
7097+
} elsif ($hash =~ m/^$oid_regex$/) {
70617098
# blobs defined by non-textual hash id's can be cached
70627099
$expires = "+1d";
70637100
}
@@ -7515,7 +7552,7 @@ sub git_commit {
75157552

75167553
# non-textual hash id's can be cached
75177554
my $expires;
7518-
if ($hash =~ m/^[0-9a-fA-F]{40}$/) {
7555+
if ($hash =~ m/^$oid_regex$/) {
75197556
$expires = "+1d";
75207557
}
75217558
my $refs = git_get_references();
@@ -7609,7 +7646,7 @@ sub git_object {
76097646
close $fd;
76107647

76117648
#'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c'
7612-
unless ($line && $line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t/) {
7649+
unless ($line && $line =~ m/^([0-9]+) (.+) ($oid_regex)\t/) {
76137650
die_error(404, "File or directory for given base does not exist");
76147651
}
76157652
$type = $2;
@@ -7649,7 +7686,7 @@ sub git_blobdiff {
76497686
or die_error(404, "Blob diff not found");
76507687

76517688
} elsif (defined $hash &&
7652-
$hash =~ /[0-9a-fA-F]{40}/) {
7689+
$hash =~ $oid_regex) {
76537690
# try to find filename from $hash
76547691

76557692
# read filtered raw output
@@ -7659,7 +7696,7 @@ sub git_blobdiff {
76597696
@difftree =
76607697
# ':100644 100644 03b21826... 3b93d5e7... M ls-files.c'
76617698
# $hash == to_id
7662-
grep { /^:[0-7]{6} [0-7]{6} [0-9a-fA-F]{40} $hash/ }
7699+
grep { /^:[0-7]{6} [0-7]{6} $oid_regex $hash/ }
76637700
map { chomp; $_ } <$fd>;
76647701
close $fd
76657702
or die_error(404, "Reading git-diff-tree failed");
@@ -7682,8 +7719,8 @@ sub git_blobdiff {
76827719
$hash ||= $diffinfo{'to_id'};
76837720

76847721
# non-textual hash id's can be cached
7685-
if ($hash_base =~ m/^[0-9a-fA-F]{40}$/ &&
7686-
$hash_parent_base =~ m/^[0-9a-fA-F]{40}$/) {
7722+
if ($hash_base =~ m/^$oid_regex$/ &&
7723+
$hash_parent_base =~ m/^$oid_regex$/) {
76877724
$expires = '+1d';
76887725
}
76897726

@@ -7819,7 +7856,7 @@ sub git_commitdiff {
78197856
$hash_parent ne '-c' && $hash_parent ne '--cc') {
78207857
# commitdiff with two commits given
78217858
my $hash_parent_short = $hash_parent;
7822-
if ($hash_parent =~ m/^[0-9a-fA-F]{40}$/) {
7859+
if ($hash_parent =~ m/^$oid_regex$/) {
78237860
$hash_parent_short = substr($hash_parent, 0, 7);
78247861
}
78257862
$formats_nav .=
@@ -7928,7 +7965,7 @@ sub git_commitdiff {
79287965

79297966
# non-textual hash id's can be cached
79307967
my $expires;
7931-
if ($hash =~ m/^[0-9a-fA-F]{40}$/) {
7968+
if ($hash =~ m/^$oid_regex$/) {
79327969
$expires = "+1d";
79337970
}
79347971

0 commit comments

Comments
 (0)