Skip to content

Commit bc5975d

Browse files
matvoregitster
authored andcommitted
list-objects-filter: implement filter tree:0
Teach list-objects the "tree:0" filter which allows for filtering out all tree and blob objects (unless other objects are explicitly specified by the user). The purpose of this patch is to allow smaller partial clones. The name of this filter - tree:0 - does not explicitly specify that it also filters out all blobs, but this should not cause much confusion because blobs are not at all useful without the trees that refer to them. I also considered only:commits as a name, but this is inaccurate because it suggests that annotated tags are omitted, but actually they are included. The name "tree:0" allows later filtering based on depth, i.e. "tree:1" would filter out all but the root tree and blobs. In order to avoid confusion between 0 and capital O, the documentation was worded in a somewhat round-about way that also hints at this future improvement to the feature. Signed-off-by: Matthew DeVore <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent cc0b05a commit bc5975d

7 files changed

+153
-0
lines changed

Documentation/rev-list-options.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,11 @@ the requested refs.
731731
+
732732
The form '--filter=sparse:path=<path>' similarly uses a sparse-checkout
733733
specification contained in <path>.
734+
+
735+
The form '--filter=tree:<depth>' omits all blobs and trees whose depth
736+
from the root tree is >= <depth> (minimum depth if an object is located
737+
at multiple depths in the commits traversed). Currently, only <depth>=0
738+
is supported, which omits all blobs and trees.
734739

735740
--no-filter::
736741
Turn off any previous `--filter=` argument.

list-objects-filter-options.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,19 @@ static int gently_parse_list_objects_filter(
4949
return 0;
5050
}
5151

52+
} else if (skip_prefix(arg, "tree:", &v0)) {
53+
unsigned long depth;
54+
if (!git_parse_ulong(v0, &depth) || depth != 0) {
55+
if (errbuf) {
56+
strbuf_addstr(
57+
errbuf,
58+
_("only 'tree:0' is supported"));
59+
}
60+
return 1;
61+
}
62+
filter_options->choice = LOFC_TREE_NONE;
63+
return 0;
64+
5265
} else if (skip_prefix(arg, "sparse:oid=", &v0)) {
5366
struct object_context oc;
5467
struct object_id sparse_oid;

list-objects-filter-options.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ enum list_objects_filter_choice {
1010
LOFC_DISABLED = 0,
1111
LOFC_BLOB_NONE,
1212
LOFC_BLOB_LIMIT,
13+
LOFC_TREE_NONE,
1314
LOFC_SPARSE_OID,
1415
LOFC_SPARSE_PATH,
1516
LOFC__COUNT /* must be last */

list-objects-filter.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,54 @@ static void *filter_blobs_none__init(
7979
return d;
8080
}
8181

82+
/*
83+
* A filter for list-objects to omit ALL trees and blobs from the traversal.
84+
* Can OPTIONALLY collect a list of the omitted OIDs.
85+
*/
86+
struct filter_trees_none_data {
87+
struct oidset *omits;
88+
};
89+
90+
static enum list_objects_filter_result filter_trees_none(
91+
enum list_objects_filter_situation filter_situation,
92+
struct object *obj,
93+
const char *pathname,
94+
const char *filename,
95+
void *filter_data_)
96+
{
97+
struct filter_trees_none_data *filter_data = filter_data_;
98+
99+
switch (filter_situation) {
100+
default:
101+
BUG("unknown filter_situation: %d", filter_situation);
102+
103+
case LOFS_BEGIN_TREE:
104+
case LOFS_BLOB:
105+
if (filter_data->omits)
106+
oidset_insert(filter_data->omits, &obj->oid);
107+
return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
108+
109+
case LOFS_END_TREE:
110+
assert(obj->type == OBJ_TREE);
111+
return LOFR_ZERO;
112+
113+
}
114+
}
115+
116+
static void* filter_trees_none__init(
117+
struct oidset *omitted,
118+
struct list_objects_filter_options *filter_options,
119+
filter_object_fn *filter_fn,
120+
filter_free_fn *filter_free_fn)
121+
{
122+
struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
123+
d->omits = omitted;
124+
125+
*filter_fn = filter_trees_none;
126+
*filter_free_fn = free;
127+
return d;
128+
}
129+
82130
/*
83131
* A filter for list-objects to omit large blobs.
84132
* And to OPTIONALLY collect a list of the omitted OIDs.
@@ -371,6 +419,7 @@ static filter_init_fn s_filters[] = {
371419
NULL,
372420
filter_blobs_none__init,
373421
filter_blobs_limit__init,
422+
filter_trees_none__init,
374423
filter_sparse_oid__init,
375424
filter_sparse_path__init,
376425
};

t/t5317-pack-objects-filter-objects.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,34 @@ test_expect_success 'get an error for missing tree object' '
7272
grep -q "bad tree object" bad_tree
7373
'
7474

75+
test_expect_success 'setup for tests of tree:0' '
76+
mkdir r1/subtree &&
77+
echo "This is a file in a subtree" >r1/subtree/file &&
78+
git -C r1 add subtree/file &&
79+
git -C r1 commit -m subtree
80+
'
81+
82+
test_expect_success 'verify tree:0 packfile has no blobs or trees' '
83+
git -C r1 pack-objects --rev --stdout --filter=tree:0 >commitsonly.pack <<-EOF &&
84+
HEAD
85+
EOF
86+
git -C r1 index-pack ../commitsonly.pack &&
87+
git -C r1 verify-pack -v ../commitsonly.pack >objs &&
88+
! grep -E "tree|blob" objs
89+
'
90+
91+
test_expect_success 'grab tree directly when using tree:0' '
92+
# We should get the tree specified directly but not its blobs or subtrees.
93+
git -C r1 pack-objects --rev --stdout --filter=tree:0 >commitsonly.pack <<-EOF &&
94+
HEAD:
95+
EOF
96+
git -C r1 index-pack ../commitsonly.pack &&
97+
git -C r1 verify-pack -v ../commitsonly.pack >objs &&
98+
awk "/tree|blob/{print \$1}" objs >trees_and_blobs &&
99+
git -C r1 rev-parse HEAD: >expected &&
100+
test_cmp expected trees_and_blobs
101+
'
102+
75103
# Test blob:limit=<n>[kmg] filter.
76104
# We boundary test around the size parameter. The filter is strictly less than
77105
# the value, so size 500 and 1000 should have the same results, but 1001 should

t/t5616-partial-clone.sh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,48 @@ test_expect_success 'partial clone with transfer.fsckobjects=1 uses index-pack -
154154
grep "git index-pack.*--fsck-objects" trace
155155
'
156156

157+
test_expect_success 'use fsck before and after manually fetching a missing subtree' '
158+
# push new commit so server has a subtree
159+
mkdir src/dir &&
160+
echo "in dir" >src/dir/file.txt &&
161+
git -C src add dir/file.txt &&
162+
git -C src commit -m "file in dir" &&
163+
git -C src push -u srv master &&
164+
SUBTREE=$(git -C src rev-parse HEAD:dir) &&
165+
166+
rm -rf dst &&
167+
git clone --no-checkout --filter=tree:0 "file://$(pwd)/srv.bare" dst &&
168+
git -C dst fsck &&
169+
170+
# Make sure we only have commits, and all trees and blobs are missing.
171+
git -C dst rev-list --missing=allow-any --objects master \
172+
>fetched_objects &&
173+
awk -f print_1.awk fetched_objects |
174+
xargs -n1 git -C dst cat-file -t >fetched_types &&
175+
176+
sort -u fetched_types >unique_types.observed &&
177+
echo commit >unique_types.expected &&
178+
test_cmp unique_types.expected unique_types.observed &&
179+
180+
# Auto-fetch a tree with cat-file.
181+
git -C dst cat-file -p $SUBTREE >tree_contents &&
182+
grep file.txt tree_contents &&
183+
184+
# fsck still works after an auto-fetch of a tree.
185+
git -C dst fsck &&
186+
187+
# Auto-fetch all remaining trees and blobs with --missing=error
188+
git -C dst rev-list --missing=error --objects master >fetched_objects &&
189+
test_line_count = 70 fetched_objects &&
190+
191+
awk -f print_1.awk fetched_objects |
192+
xargs -n1 git -C dst cat-file -t >fetched_types &&
193+
194+
sort -u fetched_types >unique_types.observed &&
195+
printf "blob\ncommit\ntree\n" >unique_types.expected &&
196+
test_cmp unique_types.expected unique_types.observed
197+
'
198+
157199
test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
158200
rm -rf src dst &&
159201
git init src &&

t/t6112-rev-list-filters-objects.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,21 @@ test_expect_success 'rev-list W/ --missing=print and --missing=allow-any for tre
230230
test_must_be_empty rev_list_err
231231
'
232232

233+
# Test tree:0 filter.
234+
235+
test_expect_success 'verify tree:0 includes trees in "filtered" output' '
236+
git -C r3 rev-list --quiet --objects --filter-print-omitted \
237+
--filter=tree:0 HEAD >revs &&
238+
239+
awk -f print_1.awk revs |
240+
sed s/~// |
241+
xargs -n1 git -C r3 cat-file -t >unsorted_filtered_types &&
242+
243+
sort -u unsorted_filtered_types >filtered_types &&
244+
printf "blob\ntree\n" >expected &&
245+
test_cmp expected filtered_types
246+
'
247+
233248
# Delete some loose objects and use rev-list, but WITHOUT any filtering.
234249
# This models previously omitted objects that we did not receive.
235250

0 commit comments

Comments
 (0)