Skip to content

Commit 6fe12b5

Browse files
committed
Merge branch 'jk/rev-list-disk-usage'
"git rev-list" command learned "--disk-usage" option. * jk/rev-list-disk-usage: docs/rev-list: add some examples of --disk-usage docs/rev-list: add an examples section rev-list: add --disk-usage option for calculating disk usage t: add --no-tag option to test_commit
2 parents 966e671 + a1db097 commit 6fe12b5

File tree

8 files changed

+292
-8
lines changed

8 files changed

+292
-8
lines changed

Documentation/git-rev-list.txt

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,99 @@ include::rev-list-options.txt[]
3131

3232
include::pretty-formats.txt[]
3333

34+
EXAMPLES
35+
--------
36+
37+
* Print the list of commits reachable from the current branch.
38+
+
39+
----------
40+
git rev-list HEAD
41+
----------
42+
43+
* Print the list of commits on this branch, but not present in the
44+
upstream branch.
45+
+
46+
----------
47+
git rev-list @{upstream}..HEAD
48+
----------
49+
50+
* Format commits with their author and commit message (see also the
51+
porcelain linkgit:git-log[1]).
52+
+
53+
----------
54+
git rev-list --format=medium HEAD
55+
----------
56+
57+
* Format commits along with their diffs (see also the porcelain
58+
linkgit:git-log[1], which can do this in a single process).
59+
+
60+
----------
61+
git rev-list HEAD |
62+
git diff-tree --stdin --format=medium -p
63+
----------
64+
65+
* Print the list of commits on the current branch that touched any
66+
file in the `Documentation` directory.
67+
+
68+
----------
69+
git rev-list HEAD -- Documentation/
70+
----------
71+
72+
* Print the list of commits authored by you in the past year, on
73+
any branch, tag, or other ref.
74+
+
75+
----------
76+
git rev-list [email protected] --since=1.year.ago --all
77+
----------
78+
79+
* Print the list of objects reachable from the current branch (i.e., all
80+
commits and the blobs and trees they contain).
81+
+
82+
----------
83+
git rev-list --objects HEAD
84+
----------
85+
86+
* Compare the disk size of all reachable objects, versus those
87+
reachable from reflogs, versus the total packed size. This can tell
88+
you whether running `git repack -ad` might reduce the repository size
89+
(by dropping unreachable objects), and whether expiring reflogs might
90+
help.
91+
+
92+
----------
93+
# reachable objects
94+
git rev-list --disk-usage --objects --all
95+
# plus reflogs
96+
git rev-list --disk-usage --objects --all --reflog
97+
# total disk size used
98+
du -c .git/objects/pack/*.pack .git/objects/??/*
99+
# alternative to du: add up "size" and "size-pack" fields
100+
git count-objects -v
101+
----------
102+
103+
* Report the disk size of each branch, not including objects used by the
104+
current branch. This can find outliers that are contributing to a
105+
bloated repository size (e.g., because somebody accidentally committed
106+
large build artifacts).
107+
+
108+
----------
109+
git for-each-ref --format='%(refname)' |
110+
while read branch
111+
do
112+
size=$(git rev-list --disk-usage --objects HEAD..$branch)
113+
echo "$size $branch"
114+
done |
115+
sort -n
116+
----------
117+
118+
* Compare the on-disk size of branches in one group of refs, excluding
119+
another. If you co-mingle objects from multiple remotes in a single
120+
repository, this can show which remotes are contributing to the
121+
repository size (taking the size of `origin` as a baseline).
122+
+
123+
----------
124+
git rev-list --disk-usage --objects --remotes=$suspect --not --remotes=origin
125+
----------
126+
34127
GIT
35128
---
36129
Part of the linkgit:git[1] suite

Documentation/rev-list-options.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,15 @@ ifdef::git-rev-list[]
227227
test the exit status to see if a range of objects is fully
228228
connected (or not). It is faster than redirecting stdout
229229
to `/dev/null` as the output does not have to be formatted.
230+
231+
--disk-usage::
232+
Suppress normal output; instead, print the sum of the bytes used
233+
for on-disk storage by the selected commits or objects. This is
234+
equivalent to piping the output into `git cat-file
235+
--batch-check='%(objectsize:disk)'`, except that it runs much
236+
faster (especially with `--use-bitmap-index`). See the `CAVEATS`
237+
section in linkgit:git-cat-file[1] for the limitations of what
238+
"on-disk storage" means.
230239
endif::git-rev-list[]
231240

232241
--cherry-mark::

builtin/rev-list.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,19 @@ static int arg_show_object_names = 1;
8080

8181
#define DEFAULT_OIDSET_SIZE (16*1024)
8282

83+
static int show_disk_usage;
84+
static off_t total_disk_usage;
85+
86+
static off_t get_object_disk_usage(struct object *obj)
87+
{
88+
off_t size;
89+
struct object_info oi = OBJECT_INFO_INIT;
90+
oi.disk_sizep = &size;
91+
if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
92+
die(_("unable to get disk usage of %s"), oid_to_hex(&obj->oid));
93+
return size;
94+
}
95+
8396
static void finish_commit(struct commit *commit);
8497
static void show_commit(struct commit *commit, void *data)
8598
{
@@ -88,6 +101,9 @@ static void show_commit(struct commit *commit, void *data)
88101

89102
display_progress(progress, ++progress_counter);
90103

104+
if (show_disk_usage)
105+
total_disk_usage += get_object_disk_usage(&commit->object);
106+
91107
if (info->flags & REV_LIST_QUIET) {
92108
finish_commit(commit);
93109
return;
@@ -258,6 +274,8 @@ static void show_object(struct object *obj, const char *name, void *cb_data)
258274
if (finish_object(obj, name, cb_data))
259275
return;
260276
display_progress(progress, ++progress_counter);
277+
if (show_disk_usage)
278+
total_disk_usage += get_object_disk_usage(obj);
261279
if (info->flags & REV_LIST_QUIET)
262280
return;
263281

@@ -452,6 +470,23 @@ static int try_bitmap_traversal(struct rev_info *revs,
452470
return 0;
453471
}
454472

473+
static int try_bitmap_disk_usage(struct rev_info *revs,
474+
struct list_objects_filter_options *filter)
475+
{
476+
struct bitmap_index *bitmap_git;
477+
478+
if (!show_disk_usage)
479+
return -1;
480+
481+
bitmap_git = prepare_bitmap_walk(revs, filter);
482+
if (!bitmap_git)
483+
return -1;
484+
485+
printf("%"PRIuMAX"\n",
486+
(uintmax_t)get_disk_usage_from_bitmap(bitmap_git, revs));
487+
return 0;
488+
}
489+
455490
int cmd_rev_list(int argc, const char **argv, const char *prefix)
456491
{
457492
struct rev_info revs;
@@ -584,6 +619,12 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
584619
continue;
585620
}
586621

622+
if (!strcmp(arg, "--disk-usage")) {
623+
show_disk_usage = 1;
624+
info.flags |= REV_LIST_QUIET;
625+
continue;
626+
}
627+
587628
usage(rev_list_usage);
588629

589630
}
@@ -626,6 +667,8 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
626667
if (use_bitmap_index) {
627668
if (!try_bitmap_count(&revs, &filter_options))
628669
return 0;
670+
if (!try_bitmap_disk_usage(&revs, &filter_options))
671+
return 0;
629672
if (!try_bitmap_traversal(&revs, &filter_options))
630673
return 0;
631674
}
@@ -690,5 +733,8 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
690733
printf("%d\n", revs.count_left + revs.count_right);
691734
}
692735

736+
if (show_disk_usage)
737+
printf("%"PRIuMAX"\n", (uintmax_t)total_disk_usage);
738+
693739
return 0;
694740
}

pack-bitmap.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,3 +1430,84 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git,
14301430
return bitmap_git &&
14311431
bitmap_walk_contains(bitmap_git, bitmap_git->haves, oid);
14321432
}
1433+
1434+
static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git,
1435+
enum object_type object_type)
1436+
{
1437+
struct bitmap *result = bitmap_git->result;
1438+
struct packed_git *pack = bitmap_git->pack;
1439+
off_t total = 0;
1440+
struct ewah_iterator it;
1441+
eword_t filter;
1442+
size_t i;
1443+
1444+
init_type_iterator(&it, bitmap_git, object_type);
1445+
for (i = 0; i < result->word_alloc &&
1446+
ewah_iterator_next(&filter, &it); i++) {
1447+
eword_t word = result->words[i] & filter;
1448+
size_t base = (i * BITS_IN_EWORD);
1449+
unsigned offset;
1450+
1451+
if (!word)
1452+
continue;
1453+
1454+
for (offset = 0; offset < BITS_IN_EWORD; offset++) {
1455+
size_t pos;
1456+
1457+
if ((word >> offset) == 0)
1458+
break;
1459+
1460+
offset += ewah_bit_ctz64(word >> offset);
1461+
pos = base + offset;
1462+
total += pack_pos_to_offset(pack, pos + 1) -
1463+
pack_pos_to_offset(pack, pos);
1464+
}
1465+
}
1466+
1467+
return total;
1468+
}
1469+
1470+
static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
1471+
{
1472+
struct bitmap *result = bitmap_git->result;
1473+
struct packed_git *pack = bitmap_git->pack;
1474+
struct eindex *eindex = &bitmap_git->ext_index;
1475+
off_t total = 0;
1476+
struct object_info oi = OBJECT_INFO_INIT;
1477+
off_t object_size;
1478+
size_t i;
1479+
1480+
oi.disk_sizep = &object_size;
1481+
1482+
for (i = 0; i < eindex->count; i++) {
1483+
struct object *obj = eindex->objects[i];
1484+
1485+
if (!bitmap_get(result, pack->num_objects + i))
1486+
continue;
1487+
1488+
if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
1489+
die(_("unable to get disk usage of %s"),
1490+
oid_to_hex(&obj->oid));
1491+
1492+
total += object_size;
1493+
}
1494+
return total;
1495+
}
1496+
1497+
off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git,
1498+
struct rev_info *revs)
1499+
{
1500+
off_t total = 0;
1501+
1502+
total += get_disk_usage_for_type(bitmap_git, OBJ_COMMIT);
1503+
if (revs->tree_objects)
1504+
total += get_disk_usage_for_type(bitmap_git, OBJ_TREE);
1505+
if (revs->blob_objects)
1506+
total += get_disk_usage_for_type(bitmap_git, OBJ_BLOB);
1507+
if (revs->tag_objects)
1508+
total += get_disk_usage_for_type(bitmap_git, OBJ_TAG);
1509+
1510+
total += get_disk_usage_for_extended(bitmap_git);
1511+
1512+
return total;
1513+
}

pack-bitmap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ int bitmap_walk_contains(struct bitmap_index *,
6868
*/
6969
int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_id *oid);
7070

71+
off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *);
72+
7173
void bitmap_writer_show_progress(int show);
7274
void bitmap_writer_set_checksum(unsigned char *sha1);
7375
void bitmap_writer_build_type_index(struct packing_data *to_pack,

t/t4208-log-magic-pathspec.sh

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,8 @@ test_expect_success '"git log :/a -- " should not be ambiguous' '
3131
test_expect_success '"git log :/detached -- " should find a commit only in HEAD' '
3232
test_when_finished "git checkout main" &&
3333
git checkout --detach &&
34-
# Must manually call `test_tick` instead of using `test_commit`,
35-
# because the latter additionally creates a tag, which would make
36-
# the commit reachable not only via HEAD.
37-
test_tick &&
38-
git commit --allow-empty -m detached &&
39-
test_tick &&
40-
git commit --allow-empty -m something-else &&
34+
test_commit --no-tag detached &&
35+
test_commit --no-tag something-else &&
4136
git log :/detached --
4237
'
4338

t/t6115-rev-list-du.sh

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/sh
2+
3+
test_description='basic tests of rev-list --disk-usage'
4+
. ./test-lib.sh
5+
6+
# we want a mix of reachable and unreachable, as well as
7+
# objects in the bitmapped pack and some outside of it
8+
test_expect_success 'set up repository' '
9+
test_commit --no-tag one &&
10+
test_commit --no-tag two &&
11+
git repack -adb &&
12+
git reset --hard HEAD^ &&
13+
test_commit --no-tag three &&
14+
test_commit --no-tag four &&
15+
git reset --hard HEAD^
16+
'
17+
18+
# We don't want to hardcode sizes, because they depend on the exact details of
19+
# packing, zlib, etc. We'll assume that the regular rev-list and cat-file
20+
# machinery works and compare the --disk-usage output to that.
21+
disk_usage_slow () {
22+
git rev-list --no-object-names "$@" |
23+
git cat-file --batch-check="%(objectsize:disk)" |
24+
perl -lne '$total += $_; END { print $total}'
25+
}
26+
27+
# check behavior with given rev-list options; note that
28+
# whitespace is not preserved in args
29+
check_du () {
30+
args=$*
31+
32+
test_expect_success "generate expected size ($args)" "
33+
disk_usage_slow $args >expect
34+
"
35+
36+
test_expect_success "rev-list --disk-usage without bitmaps ($args)" "
37+
git rev-list --disk-usage $args >actual &&
38+
test_cmp expect actual
39+
"
40+
41+
test_expect_success "rev-list --disk-usage with bitmaps ($args)" "
42+
git rev-list --disk-usage --use-bitmap-index $args >actual &&
43+
test_cmp expect actual
44+
"
45+
}
46+
47+
check_du HEAD
48+
check_du --objects HEAD
49+
check_du --objects HEAD^..HEAD
50+
51+
test_done

0 commit comments

Comments
 (0)