Skip to content

Commit c813a7c

Browse files
matvoregitster
authored andcommitted
list-objects-filter: teach tree:# how to handle >0
Implement positive values for <depth> in the tree:<depth> filter. The exact semantics are described in Documentation/rev-list-options.txt. The long-term goal at the end of this is to allow a partial clone to eagerly fetch an entire directory of files by fetching a tree and specifying <depth>=1. This, for instance, would make a build operation fast and convenient. It is fast because the partial clone does not need to fetch each file individually, and convenient because the user does not need to supply a sparse-checkout specification. Another way of considering this feature is as a way to reduce round-trips, since the client can get any number of levels of directories in a single request, rather than wait for each level of tree objects to come back, whose entries are used to construct a new request. Signed-off-by: Matthew DeVore <[email protected]> Reviewed-by: Jonathan Tan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0aa9d8a commit c813a7c

File tree

5 files changed

+219
-27
lines changed

5 files changed

+219
-27
lines changed

Documentation/rev-list-options.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -734,8 +734,13 @@ specification contained in <path>.
734734
+
735735
The form '--filter=tree:<depth>' omits all blobs and trees whose depth
736736
from the root tree is >= <depth> (minimum depth if an object is located
737-
at multiple depths in the commits traversed). Currently, only <depth>=0
738-
is supported, which omits all blobs and trees.
737+
at multiple depths in the commits traversed). <depth>=0 will not include
738+
any trees or blobs unless included explicitly in the command-line (or
739+
standard input when --stdin is used). <depth>=1 will include only the
740+
tree and blobs which are referenced directly by a commit reachable from
741+
<commit> or an explicitly-given object. <depth>=2 is like <depth>=1
742+
while also including trees and blobs one more level removed from an
743+
explicitly-given commit or tree.
739744

740745
--no-filter::
741746
Turn off any previous `--filter=` argument.

list-objects-filter-options.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,15 @@ static int gently_parse_list_objects_filter(
5050
}
5151

5252
} else if (skip_prefix(arg, "tree:", &v0)) {
53-
unsigned long depth;
54-
if (!git_parse_ulong(v0, &depth) || depth != 0) {
53+
if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) {
5554
if (errbuf) {
5655
strbuf_addstr(
5756
errbuf,
58-
_("only 'tree:0' is supported"));
57+
_("expected 'tree:<depth>'"));
5958
}
6059
return 1;
6160
}
62-
filter_options->choice = LOFC_TREE_NONE;
61+
filter_options->choice = LOFC_TREE_DEPTH;
6362
return 0;
6463

6564
} else if (skip_prefix(arg, "sparse:oid=", &v0)) {

list-objects-filter-options.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ enum list_objects_filter_choice {
1010
LOFC_DISABLED = 0,
1111
LOFC_BLOB_NONE,
1212
LOFC_BLOB_LIMIT,
13-
LOFC_TREE_NONE,
13+
LOFC_TREE_DEPTH,
1414
LOFC_SPARSE_OID,
1515
LOFC_SPARSE_PATH,
1616
LOFC__COUNT /* must be last */
@@ -44,6 +44,7 @@ struct list_objects_filter_options {
4444
struct object_id *sparse_oid_value;
4545
char *sparse_path_value;
4646
unsigned long blob_limit_value;
47+
unsigned long tree_exclude_depth;
4748
};
4849

4950
/* Normalized command line arguments */

list-objects-filter.c

Lines changed: 96 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "list-objects.h"
1111
#include "list-objects-filter.h"
1212
#include "list-objects-filter-options.h"
13+
#include "oidmap.h"
1314
#include "oidset.h"
1415
#include "object-store.h"
1516

@@ -84,55 +85,130 @@ static void *filter_blobs_none__init(
8485
* A filter for list-objects to omit ALL trees and blobs from the traversal.
8586
* Can OPTIONALLY collect a list of the omitted OIDs.
8687
*/
87-
struct filter_trees_none_data {
88+
struct filter_trees_depth_data {
8889
struct oidset *omits;
90+
91+
/*
92+
* Maps trees to the minimum depth at which they were seen. It is not
93+
* necessary to re-traverse a tree at deeper or equal depths than it has
94+
* already been traversed.
95+
*
96+
* We can't use LOFR_MARK_SEEN for tree objects since this will prevent
97+
* it from being traversed at shallower depths.
98+
*/
99+
struct oidmap seen_at_depth;
100+
101+
unsigned long exclude_depth;
102+
unsigned long current_depth;
89103
};
90104

91-
static enum list_objects_filter_result filter_trees_none(
105+
struct seen_map_entry {
106+
struct oidmap_entry base;
107+
size_t depth;
108+
};
109+
110+
static void filter_trees_update_omits(
111+
struct object *obj,
112+
struct filter_trees_depth_data *filter_data,
113+
int include_it)
114+
{
115+
if (!filter_data->omits)
116+
return;
117+
118+
if (include_it)
119+
oidset_remove(filter_data->omits, &obj->oid);
120+
else
121+
oidset_insert(filter_data->omits, &obj->oid);
122+
}
123+
124+
static enum list_objects_filter_result filter_trees_depth(
92125
struct repository *r,
93126
enum list_objects_filter_situation filter_situation,
94127
struct object *obj,
95128
const char *pathname,
96129
const char *filename,
97130
void *filter_data_)
98131
{
99-
struct filter_trees_none_data *filter_data = filter_data_;
132+
struct filter_trees_depth_data *filter_data = filter_data_;
133+
struct seen_map_entry *seen_info;
134+
int include_it = filter_data->current_depth <
135+
filter_data->exclude_depth;
136+
int filter_res;
137+
int already_seen;
138+
139+
/*
140+
* Note that we do not use _MARK_SEEN in order to allow re-traversal in
141+
* case we encounter a tree or blob again at a shallower depth.
142+
*/
100143

101144
switch (filter_situation) {
102145
default:
103146
BUG("unknown filter_situation: %d", filter_situation);
104147

105-
case LOFS_BEGIN_TREE:
148+
case LOFS_END_TREE:
149+
assert(obj->type == OBJ_TREE);
150+
filter_data->current_depth--;
151+
return LOFR_ZERO;
152+
106153
case LOFS_BLOB:
107-
if (filter_data->omits) {
108-
oidset_insert(filter_data->omits, &obj->oid);
109-
/* _MARK_SEEN but not _DO_SHOW (hard omit) */
110-
return LOFR_MARK_SEEN;
154+
filter_trees_update_omits(obj, filter_data, include_it);
155+
return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
156+
157+
case LOFS_BEGIN_TREE:
158+
seen_info = oidmap_get(
159+
&filter_data->seen_at_depth, &obj->oid);
160+
if (!seen_info) {
161+
seen_info = xcalloc(1, sizeof(*seen_info));
162+
oidcpy(&seen_info->base.oid, &obj->oid);
163+
seen_info->depth = filter_data->current_depth;
164+
oidmap_put(&filter_data->seen_at_depth, seen_info);
165+
already_seen = 0;
111166
} else {
112-
/*
113-
* Not collecting omits so no need to to traverse tree.
114-
*/
115-
return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
167+
already_seen =
168+
filter_data->current_depth >= seen_info->depth;
116169
}
117170

118-
case LOFS_END_TREE:
119-
assert(obj->type == OBJ_TREE);
120-
return LOFR_ZERO;
171+
if (already_seen) {
172+
filter_res = LOFR_SKIP_TREE;
173+
} else {
174+
seen_info->depth = filter_data->current_depth;
175+
filter_trees_update_omits(obj, filter_data, include_it);
176+
177+
if (include_it)
178+
filter_res = LOFR_DO_SHOW;
179+
else if (filter_data->omits)
180+
filter_res = LOFR_ZERO;
181+
else
182+
filter_res = LOFR_SKIP_TREE;
183+
}
121184

185+
filter_data->current_depth++;
186+
return filter_res;
122187
}
123188
}
124189

125-
static void* filter_trees_none__init(
190+
static void filter_trees_free(void *filter_data) {
191+
struct filter_trees_depth_data *d = filter_data;
192+
if (!d)
193+
return;
194+
oidmap_free(&d->seen_at_depth, 1);
195+
free(d);
196+
}
197+
198+
static void *filter_trees_depth__init(
126199
struct oidset *omitted,
127200
struct list_objects_filter_options *filter_options,
128201
filter_object_fn *filter_fn,
129202
filter_free_fn *filter_free_fn)
130203
{
131-
struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
204+
struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
132205
d->omits = omitted;
206+
oidmap_init(&d->seen_at_depth, 0);
207+
d->exclude_depth = filter_options->tree_exclude_depth;
208+
d->current_depth = 0;
133209

134-
*filter_fn = filter_trees_none;
135-
*filter_free_fn = free;
210+
*filter_fn = filter_trees_depth;
211+
*filter_free_fn = filter_trees_free;
136212
return d;
137213
}
138214

@@ -430,7 +506,7 @@ static filter_init_fn s_filters[] = {
430506
NULL,
431507
filter_blobs_none__init,
432508
filter_blobs_limit__init,
433-
filter_trees_none__init,
509+
filter_trees_depth__init,
434510
filter_sparse_oid__init,
435511
filter_sparse_path__init,
436512
};

t/t6112-rev-list-filters-objects.sh

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,117 @@ test_expect_success 'filter a GIANT tree through tree:0' '
294294
! grep "Skipping contents of tree [^.]" filter_trace
295295
'
296296

297+
# Test tree:# filters.
298+
299+
expect_has () {
300+
commit=$1 &&
301+
name=$2 &&
302+
303+
hash=$(git -C r3 rev-parse $commit:$name) &&
304+
grep "^$hash $name$" actual
305+
}
306+
307+
test_expect_success 'verify tree:1 includes root trees' '
308+
git -C r3 rev-list --objects --filter=tree:1 HEAD >actual &&
309+
310+
# We should get two root directories and two commits.
311+
expect_has HEAD "" &&
312+
expect_has HEAD~1 "" &&
313+
test_line_count = 4 actual
314+
'
315+
316+
test_expect_success 'verify tree:2 includes root trees and immediate children' '
317+
git -C r3 rev-list --objects --filter=tree:2 HEAD >actual &&
318+
319+
expect_has HEAD "" &&
320+
expect_has HEAD~1 "" &&
321+
expect_has HEAD dir1 &&
322+
expect_has HEAD pattern &&
323+
expect_has HEAD sparse1 &&
324+
expect_has HEAD sparse2 &&
325+
326+
# There are also 2 commit objects
327+
test_line_count = 8 actual
328+
'
329+
330+
test_expect_success 'verify tree:3 includes everything expected' '
331+
git -C r3 rev-list --objects --filter=tree:3 HEAD >actual &&
332+
333+
expect_has HEAD "" &&
334+
expect_has HEAD~1 "" &&
335+
expect_has HEAD dir1 &&
336+
expect_has HEAD dir1/sparse1 &&
337+
expect_has HEAD dir1/sparse2 &&
338+
expect_has HEAD pattern &&
339+
expect_has HEAD sparse1 &&
340+
expect_has HEAD sparse2 &&
341+
342+
# There are also 2 commit objects
343+
test_line_count = 10 actual
344+
'
345+
346+
# Test provisional omit collection logic with a repo that has objects appearing
347+
# at multiple depths - first deeper than the filter's threshold, then shallow.
348+
349+
test_expect_success 'setup r4' '
350+
git init r4 &&
351+
352+
echo foo > r4/foo &&
353+
mkdir r4/subdir &&
354+
echo bar > r4/subdir/bar &&
355+
356+
mkdir r4/filt &&
357+
cp -r r4/foo r4/subdir r4/filt &&
358+
359+
git -C r4 add foo subdir filt &&
360+
git -C r4 commit -m "commit msg"
361+
'
362+
363+
expect_has_with_different_name () {
364+
repo=$1 &&
365+
name=$2 &&
366+
367+
hash=$(git -C $repo rev-parse HEAD:$name) &&
368+
! grep "^$hash $name$" actual &&
369+
grep "^$hash " actual &&
370+
! grep "~$hash" actual
371+
}
372+
373+
test_expect_success 'test tree:# filter provisional omit for blob and tree' '
374+
git -C r4 rev-list --objects --filter-print-omitted --filter=tree:2 \
375+
HEAD >actual &&
376+
expect_has_with_different_name r4 filt/foo &&
377+
expect_has_with_different_name r4 filt/subdir
378+
'
379+
380+
# Test tree:<depth> where a tree is iterated to twice - once where a subentry is
381+
# too deep to be included, and again where the blob inside it is shallow enough
382+
# to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we
383+
# can't use it because a tree can be iterated over again at a lower depth).
384+
385+
test_expect_success 'tree:<depth> where we iterate over tree at two levels' '
386+
git init r5 &&
387+
388+
mkdir -p r5/a/subdir/b &&
389+
echo foo > r5/a/subdir/b/foo &&
390+
391+
mkdir -p r5/subdir/b &&
392+
echo foo > r5/subdir/b/foo &&
393+
394+
git -C r5 add a subdir &&
395+
git -C r5 commit -m "commit msg" &&
396+
397+
git -C r5 rev-list --objects --filter=tree:4 HEAD >actual &&
398+
expect_has_with_different_name r5 a/subdir/b/foo
399+
'
400+
401+
test_expect_success 'tree:<depth> which filters out blob but given as arg' '
402+
blob_hash=$(git -C r4 rev-parse HEAD:subdir/bar) &&
403+
404+
git -C r4 rev-list --objects --filter=tree:1 HEAD $blob_hash >actual &&
405+
grep ^$blob_hash actual
406+
'
407+
297408
# Delete some loose objects and use rev-list, but WITHOUT any filtering.
298409
# This models previously omitted objects that we did not receive.
299410

0 commit comments

Comments
 (0)