Skip to content

Commit e987df5

Browse files
matvoregitster
authored andcommitted
list-objects-filter: implement composite filters
Allow combining filters such that only objects accepted by all filters are shown. The motivation for this is to allow getting directory listings without also fetching blobs. This can be done by combining blob:none with tree:<depth>. There are massive repositories that have larger-than-expected trees - even if you include only a single commit. A combined filter supports any number of subfilters, and is written in the following form: combine:<filter 1>+<filter 2>+<filter 3> Certain non-alphanumeric characters in each filter must be URL-encoded. For now, combined filters must be specified in this form. In a subsequent commit, rev-list will support multiple --filter arguments which will have the same effect as specifying one filter argument starting with "combine:". The documentation will be updated in that commit, as the URL-encoding scheme is in general not meant to be used directly by the user, and it is better to describe the URL-encoding feature in terms of the repeated flag. Helped-by: Emily Shaffer <[email protected]> Helped-by: Jeff Hostetler <[email protected]> Helped-by: Johannes Schindelin <[email protected]> Helped-by: Jonathan Tan <[email protected]> Helped-by: Junio C Hamano <[email protected]> Signed-off-by: Matthew DeVore <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 842b005 commit e987df5

7 files changed

+454
-8
lines changed

list-objects-filter-options.c

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
#include "list-objects.h"
77
#include "list-objects-filter.h"
88
#include "list-objects-filter-options.h"
9+
#include "url.h"
10+
11+
static int parse_combine_filter(
12+
struct list_objects_filter_options *filter_options,
13+
const char *arg,
14+
struct strbuf *errbuf);
915

1016
/*
1117
* Parse value of the argument to the "filter" keyword.
@@ -35,8 +41,6 @@ static int gently_parse_list_objects_filter(
3541
return 1;
3642
}
3743

38-
filter_options->filter_spec = strdup(arg);
39-
4044
if (!strcmp(arg, "blob:none")) {
4145
filter_options->choice = LOFC_BLOB_NONE;
4246
return 0;
@@ -77,6 +81,10 @@ static int gently_parse_list_objects_filter(
7781
_("sparse:path filters support has been dropped"));
7882
}
7983
return 1;
84+
85+
} else if (skip_prefix(arg, "combine:", &v0)) {
86+
return parse_combine_filter(filter_options, v0, errbuf);
87+
8088
}
8189
/*
8290
* Please update _git_fetch() in git-completion.bash when you
@@ -89,10 +97,95 @@ static int gently_parse_list_objects_filter(
8997
return 1;
9098
}
9199

100+
static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?";
101+
102+
static int has_reserved_character(
103+
struct strbuf *sub_spec, struct strbuf *errbuf)
104+
{
105+
const char *c = sub_spec->buf;
106+
while (*c) {
107+
if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) {
108+
strbuf_addf(
109+
errbuf,
110+
_("must escape char in sub-filter-spec: '%c'"),
111+
*c);
112+
return 1;
113+
}
114+
c++;
115+
}
116+
117+
return 0;
118+
}
119+
120+
static int parse_combine_subfilter(
121+
struct list_objects_filter_options *filter_options,
122+
struct strbuf *subspec,
123+
struct strbuf *errbuf)
124+
{
125+
size_t new_index = filter_options->sub_nr++;
126+
char *decoded;
127+
int result;
128+
129+
ALLOC_GROW(filter_options->sub, filter_options->sub_nr,
130+
filter_options->sub_alloc);
131+
memset(&filter_options->sub[new_index], 0,
132+
sizeof(*filter_options->sub));
133+
134+
decoded = url_percent_decode(subspec->buf);
135+
136+
result = has_reserved_character(subspec, errbuf) ||
137+
gently_parse_list_objects_filter(
138+
&filter_options->sub[new_index], decoded, errbuf);
139+
140+
free(decoded);
141+
return result;
142+
}
143+
144+
static int parse_combine_filter(
145+
struct list_objects_filter_options *filter_options,
146+
const char *arg,
147+
struct strbuf *errbuf)
148+
{
149+
struct strbuf **subspecs = strbuf_split_str(arg, '+', 0);
150+
size_t sub;
151+
int result = 0;
152+
153+
if (!subspecs[0]) {
154+
strbuf_addstr(errbuf, _("expected something after combine:"));
155+
result = 1;
156+
goto cleanup;
157+
}
158+
159+
for (sub = 0; subspecs[sub] && !result; sub++) {
160+
if (subspecs[sub + 1]) {
161+
/*
162+
* This is not the last subspec. Remove trailing "+" so
163+
* we can parse it.
164+
*/
165+
size_t last = subspecs[sub]->len - 1;
166+
assert(subspecs[sub]->buf[last] == '+');
167+
strbuf_remove(subspecs[sub], last, 1);
168+
}
169+
result = parse_combine_subfilter(
170+
filter_options, subspecs[sub], errbuf);
171+
}
172+
173+
filter_options->choice = LOFC_COMBINE;
174+
175+
cleanup:
176+
strbuf_list_free(subspecs);
177+
if (result) {
178+
list_objects_filter_release(filter_options);
179+
memset(filter_options, 0, sizeof(*filter_options));
180+
}
181+
return result;
182+
}
183+
92184
int parse_list_objects_filter(struct list_objects_filter_options *filter_options,
93185
const char *arg)
94186
{
95187
struct strbuf buf = STRBUF_INIT;
188+
filter_options->filter_spec = strdup(arg);
96189
if (gently_parse_list_objects_filter(filter_options, arg, &buf))
97190
die("%s", buf.buf);
98191
return 0;
@@ -129,8 +222,15 @@ void expand_list_objects_filter_spec(
129222
void list_objects_filter_release(
130223
struct list_objects_filter_options *filter_options)
131224
{
225+
size_t sub;
226+
227+
if (!filter_options)
228+
return;
132229
free(filter_options->filter_spec);
133230
free(filter_options->sparse_oid_value);
231+
for (sub = 0; sub < filter_options->sub_nr; sub++)
232+
list_objects_filter_release(&filter_options->sub[sub]);
233+
free(filter_options->sub);
134234
memset(filter_options, 0, sizeof(*filter_options));
135235
}
136236

@@ -174,6 +274,8 @@ void partial_clone_get_default_filter_spec(
174274
*/
175275
if (!core_partial_clone_filter_default)
176276
return;
277+
278+
filter_options->filter_spec = strdup(core_partial_clone_filter_default);
177279
gently_parse_list_objects_filter(filter_options,
178280
core_partial_clone_filter_default,
179281
&errbuf);

list-objects-filter-options.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ enum list_objects_filter_choice {
1313
LOFC_BLOB_LIMIT,
1414
LOFC_TREE_DEPTH,
1515
LOFC_SPARSE_OID,
16+
LOFC_COMBINE,
1617
LOFC__COUNT /* must be last */
1718
};
1819

@@ -38,13 +39,23 @@ struct list_objects_filter_options {
3839
unsigned int no_filter : 1;
3940

4041
/*
41-
* Parsed values (fields) from within the filter-spec. These are
42-
* choice-specific; not all values will be defined for any given
43-
* choice.
42+
* BEGIN choice-specific parsed values from within the filter-spec. Only
43+
* some values will be defined for any given choice.
4444
*/
45+
4546
struct object_id *sparse_oid_value;
4647
unsigned long blob_limit_value;
4748
unsigned long tree_exclude_depth;
49+
50+
/* LOFC_COMBINE values */
51+
52+
/* This array contains all the subfilters which this filter combines. */
53+
size_t sub_nr, sub_alloc;
54+
struct list_objects_filter_options *sub;
55+
56+
/*
57+
* END choice-specific parsed values.
58+
*/
4859
};
4960

5061
/* Normalized command line arguments */

list-objects-filter.c

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@
2626
*/
2727
#define FILTER_SHOWN_BUT_REVISIT (1<<21)
2828

29+
struct subfilter {
30+
struct filter *filter;
31+
struct oidset seen;
32+
struct oidset omits;
33+
struct object_id skip_tree;
34+
unsigned is_skipping_tree : 1;
35+
};
36+
2937
struct filter {
3038
enum list_objects_filter_result (*filter_object_fn)(
3139
struct repository *r,
@@ -36,6 +44,23 @@ struct filter {
3644
struct oidset *omits,
3745
void *filter_data);
3846

47+
/*
48+
* Optional. If this function is supplied and the filter needs
49+
* to collect omits, then this function is called once before
50+
* free_fn is called.
51+
*
52+
* This is required because the following two conditions hold:
53+
*
54+
* a. A tree filter can add and remove objects as an object
55+
* graph is traversed.
56+
* b. A combine filter's omit set is the union of all its
57+
* subfilters, which may include tree: filters.
58+
*
59+
* As such, the omits sets must be separate sets, and can only
60+
* be unioned after the traversal is completed.
61+
*/
62+
void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);
63+
3964
void (*free_fn)(void *filter_data);
4065

4166
void *filter_data;
@@ -471,6 +496,139 @@ static void filter_sparse_oid__init(
471496
filter->free_fn = filter_sparse_free;
472497
}
473498

499+
/* A filter which only shows objects shown by all sub-filters. */
500+
struct combine_filter_data {
501+
struct subfilter *sub;
502+
size_t nr;
503+
};
504+
505+
static enum list_objects_filter_result process_subfilter(
506+
struct repository *r,
507+
enum list_objects_filter_situation filter_situation,
508+
struct object *obj,
509+
const char *pathname,
510+
const char *filename,
511+
struct subfilter *sub)
512+
{
513+
enum list_objects_filter_result result;
514+
515+
/*
516+
* Check and update is_skipping_tree before oidset_contains so
517+
* that is_skipping_tree gets unset even when the object is
518+
* marked as seen. As of this writing, no filter uses
519+
* LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
520+
* ordering is only theoretically important. Be cautious if you
521+
* change the order of the below checks and more filters have
522+
* been added!
523+
*/
524+
if (sub->is_skipping_tree) {
525+
if (filter_situation == LOFS_END_TREE &&
526+
oideq(&obj->oid, &sub->skip_tree))
527+
sub->is_skipping_tree = 0;
528+
else
529+
return LOFR_ZERO;
530+
}
531+
if (oidset_contains(&sub->seen, &obj->oid))
532+
return LOFR_ZERO;
533+
534+
result = list_objects_filter__filter_object(
535+
r, filter_situation, obj, pathname, filename, sub->filter);
536+
537+
if (result & LOFR_MARK_SEEN)
538+
oidset_insert(&sub->seen, &obj->oid);
539+
540+
if (result & LOFR_SKIP_TREE) {
541+
sub->is_skipping_tree = 1;
542+
sub->skip_tree = obj->oid;
543+
}
544+
545+
return result;
546+
}
547+
548+
static enum list_objects_filter_result filter_combine(
549+
struct repository *r,
550+
enum list_objects_filter_situation filter_situation,
551+
struct object *obj,
552+
const char *pathname,
553+
const char *filename,
554+
struct oidset *omits,
555+
void *filter_data)
556+
{
557+
struct combine_filter_data *d = filter_data;
558+
enum list_objects_filter_result combined_result =
559+
LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;
560+
size_t sub;
561+
562+
for (sub = 0; sub < d->nr; sub++) {
563+
enum list_objects_filter_result sub_result = process_subfilter(
564+
r, filter_situation, obj, pathname, filename,
565+
&d->sub[sub]);
566+
if (!(sub_result & LOFR_DO_SHOW))
567+
combined_result &= ~LOFR_DO_SHOW;
568+
if (!(sub_result & LOFR_MARK_SEEN))
569+
combined_result &= ~LOFR_MARK_SEEN;
570+
if (!d->sub[sub].is_skipping_tree)
571+
combined_result &= ~LOFR_SKIP_TREE;
572+
}
573+
574+
return combined_result;
575+
}
576+
577+
static void filter_combine__free(void *filter_data)
578+
{
579+
struct combine_filter_data *d = filter_data;
580+
size_t sub;
581+
for (sub = 0; sub < d->nr; sub++) {
582+
list_objects_filter__free(d->sub[sub].filter);
583+
oidset_clear(&d->sub[sub].seen);
584+
if (d->sub[sub].omits.set.size)
585+
BUG("expected oidset to be cleared already");
586+
}
587+
free(d->sub);
588+
}
589+
590+
static void add_all(struct oidset *dest, struct oidset *src) {
591+
struct oidset_iter iter;
592+
struct object_id *src_oid;
593+
594+
oidset_iter_init(src, &iter);
595+
while ((src_oid = oidset_iter_next(&iter)) != NULL)
596+
oidset_insert(dest, src_oid);
597+
}
598+
599+
static void filter_combine__finalize_omits(
600+
struct oidset *omits,
601+
void *filter_data)
602+
{
603+
struct combine_filter_data *d = filter_data;
604+
size_t sub;
605+
606+
for (sub = 0; sub < d->nr; sub++) {
607+
add_all(omits, &d->sub[sub].omits);
608+
oidset_clear(&d->sub[sub].omits);
609+
}
610+
}
611+
612+
static void filter_combine__init(
613+
struct list_objects_filter_options *filter_options,
614+
struct filter* filter)
615+
{
616+
struct combine_filter_data *d = xcalloc(1, sizeof(*d));
617+
size_t sub;
618+
619+
d->nr = filter_options->sub_nr;
620+
d->sub = xcalloc(d->nr, sizeof(*d->sub));
621+
for (sub = 0; sub < d->nr; sub++)
622+
d->sub[sub].filter = list_objects_filter__init(
623+
filter->omits ? &d->sub[sub].omits : NULL,
624+
&filter_options->sub[sub]);
625+
626+
filter->filter_data = d;
627+
filter->filter_object_fn = filter_combine;
628+
filter->free_fn = filter_combine__free;
629+
filter->finalize_omits_fn = filter_combine__finalize_omits;
630+
}
631+
474632
typedef void (*filter_init_fn)(
475633
struct list_objects_filter_options *filter_options,
476634
struct filter *filter);
@@ -484,6 +642,7 @@ static filter_init_fn s_filters[] = {
484642
filter_blobs_limit__init,
485643
filter_trees_depth__init,
486644
filter_sparse_oid__init,
645+
filter_combine__init,
487646
};
488647

489648
struct filter *list_objects_filter__init(
@@ -536,6 +695,8 @@ void list_objects_filter__free(struct filter *filter)
536695
{
537696
if (!filter)
538697
return;
698+
if (filter->finalize_omits_fn && filter->omits)
699+
filter->finalize_omits_fn(filter->omits, filter->filter_data);
539700
filter->free_fn(filter->filter_data);
540701
free(filter);
541702
}

0 commit comments

Comments
 (0)