Skip to content

Commit fbf66a7

Browse files
committed
Merge branch 'ps/cat-file-filter-batch' into jch
"git cat-file --batch" and friends learned to allow "--filter=" to omit certain objects, just like the transport layer does. Comments? * ps/cat-file-filter-batch: builtin/cat-file: use bitmaps to efficiently filter by object type builtin/cat-file: deduplicate logic to iterate over all objects pack-bitmap: introduce function to check whether a pack is bitmapped pack-bitmap: add function to iterate over filtered bitmapped objects pack-bitmap: allow passing payloads to `show_reachable_fn()` builtin/cat-file: support "object:type=" objects filter builtin/cat-file: support "blob:limit=" objects filter builtin/cat-file: support "blob:none" objects filter builtin/cat-file: wire up an option to filter objects builtin/cat-file: rename variable that tracks usage
2 parents 962ffbb + 04d3676 commit fbf66a7

File tree

8 files changed

+346
-71
lines changed

8 files changed

+346
-71
lines changed

Documentation/git-cat-file.adoc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,22 @@ OPTIONS
8181
end-of-line conversion, etc). In this case, `<object>` has to be of
8282
the form `<tree-ish>:<path>`, or `:<path>`.
8383

84+
--filter=<filter-spec>::
85+
--no-filter::
86+
Omit objects from the list of printed objects. This can only be used in
87+
combination with one of the batched modes. The '<filter-spec>' may be
88+
one of the following:
89+
+
90+
The form '--filter=blob:none' omits all blobs.
91+
+
92+
The form '--filter=blob:limit=<n>[kmg]' omits blobs of size at least n
93+
bytes or units. n may be zero. The suffixes k, m, and g can be used to name
94+
units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same as
95+
'blob:limit=1024'.
96+
+
97+
The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects which
98+
are not of the requested type.
99+
84100
--path=<path>::
85101
For use with `--textconv` or `--filters`, to allow specifying an object
86102
name and a path separately, e.g. when it is difficult to figure out

builtin/cat-file.c

Lines changed: 154 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
#include "gettext.h"
1616
#include "hex.h"
1717
#include "ident.h"
18+
#include "list-objects-filter-options.h"
1819
#include "parse-options.h"
1920
#include "userdiff.h"
2021
#include "streaming.h"
2122
#include "oid-array.h"
2223
#include "packfile.h"
24+
#include "pack-bitmap.h"
2325
#include "object-file.h"
2426
#include "object-name.h"
2527
#include "object-store-ll.h"
@@ -47,6 +49,7 @@ enum batch_mode {
4749
};
4850

4951
struct batch_options {
52+
struct list_objects_filter_options objects_filter;
5053
int enabled;
5154
int follow_symlinks;
5255
enum batch_mode batch_mode;
@@ -484,8 +487,13 @@ static void batch_object_write(const char *obj_name,
484487
if (!data->skip_object_info) {
485488
int ret;
486489

487-
if (use_mailmap)
490+
if (use_mailmap ||
491+
opt->objects_filter.choice == LOFC_BLOB_NONE ||
492+
opt->objects_filter.choice == LOFC_BLOB_LIMIT ||
493+
opt->objects_filter.choice == LOFC_OBJECT_TYPE)
488494
data->info.typep = &data->type;
495+
if (opt->objects_filter.choice == LOFC_BLOB_LIMIT)
496+
data->info.sizep = &data->size;
489497

490498
if (pack)
491499
ret = packed_object_info(the_repository, pack, offset,
@@ -501,6 +509,26 @@ static void batch_object_write(const char *obj_name,
501509
return;
502510
}
503511

512+
switch (opt->objects_filter.choice) {
513+
case LOFC_DISABLED:
514+
break;
515+
case LOFC_BLOB_NONE:
516+
if (data->type == OBJ_BLOB)
517+
return;
518+
break;
519+
case LOFC_BLOB_LIMIT:
520+
if (data->type == OBJ_BLOB &&
521+
data->size >= opt->objects_filter.blob_limit_value)
522+
return;
523+
break;
524+
case LOFC_OBJECT_TYPE:
525+
if (data->type != opt->objects_filter.object_type)
526+
return;
527+
break;
528+
default:
529+
BUG("unsupported objects filter");
530+
}
531+
504532
if (use_mailmap && (data->type == OBJ_COMMIT || data->type == OBJ_TAG)) {
505533
size_t s = data->size;
506534
char *buf = NULL;
@@ -664,25 +692,18 @@ static int batch_object_cb(const struct object_id *oid, void *vdata)
664692
return 0;
665693
}
666694

667-
static int collect_loose_object(const struct object_id *oid,
668-
const char *path UNUSED,
669-
void *data)
670-
{
671-
oid_array_append(data, oid);
672-
return 0;
673-
}
674-
675-
static int collect_packed_object(const struct object_id *oid,
676-
struct packed_git *pack UNUSED,
677-
uint32_t pos UNUSED,
678-
void *data)
695+
static int collect_object(const struct object_id *oid,
696+
struct packed_git *pack UNUSED,
697+
off_t offset UNUSED,
698+
void *data)
679699
{
680700
oid_array_append(data, oid);
681701
return 0;
682702
}
683703

684704
static int batch_unordered_object(const struct object_id *oid,
685-
struct packed_git *pack, off_t offset,
705+
struct packed_git *pack,
706+
off_t offset,
686707
void *vdata)
687708
{
688709
struct object_cb_data *data = vdata;
@@ -696,23 +717,6 @@ static int batch_unordered_object(const struct object_id *oid,
696717
return 0;
697718
}
698719

699-
static int batch_unordered_loose(const struct object_id *oid,
700-
const char *path UNUSED,
701-
void *data)
702-
{
703-
return batch_unordered_object(oid, NULL, 0, data);
704-
}
705-
706-
static int batch_unordered_packed(const struct object_id *oid,
707-
struct packed_git *pack,
708-
uint32_t pos,
709-
void *data)
710-
{
711-
return batch_unordered_object(oid, pack,
712-
nth_packed_object_offset(pack, pos),
713-
data);
714-
}
715-
716720
typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
717721
struct strbuf *, struct expand_data *);
718722

@@ -885,6 +889,76 @@ static void batch_objects_command(struct batch_options *opt,
885889

886890
#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
887891

892+
typedef int (*for_each_object_fn)(const struct object_id *oid, struct packed_git *pack,
893+
off_t offset, void *data);
894+
895+
struct for_each_object_payload {
896+
for_each_object_fn callback;
897+
void *payload;
898+
};
899+
900+
static int batch_one_object_loose(const struct object_id *oid,
901+
const char *path UNUSED,
902+
void *_payload)
903+
{
904+
struct for_each_object_payload *payload = _payload;
905+
return payload->callback(oid, NULL, 0, payload->payload);
906+
}
907+
908+
static int batch_one_object_packed(const struct object_id *oid,
909+
struct packed_git *pack,
910+
uint32_t pos,
911+
void *_payload)
912+
{
913+
struct for_each_object_payload *payload = _payload;
914+
return payload->callback(oid, pack, nth_packed_object_offset(pack, pos),
915+
payload->payload);
916+
}
917+
918+
static int batch_one_object_bitmapped(const struct object_id *oid,
919+
enum object_type type UNUSED,
920+
int flags UNUSED,
921+
uint32_t hash UNUSED,
922+
struct packed_git *pack,
923+
off_t offset,
924+
void *_payload)
925+
{
926+
struct for_each_object_payload *payload = _payload;
927+
return payload->callback(oid, pack, offset, payload->payload);
928+
}
929+
930+
static void batch_each_object(struct batch_options *opt,
931+
for_each_object_fn callback,
932+
unsigned flags,
933+
void *_payload)
934+
{
935+
struct for_each_object_payload payload = {
936+
.callback = callback,
937+
.payload = _payload,
938+
};
939+
struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
940+
941+
for_each_loose_object(batch_one_object_loose, &payload, 0);
942+
943+
if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
944+
batch_one_object_bitmapped, &payload)) {
945+
struct packed_git *pack;
946+
947+
for (pack = get_all_packs(the_repository); pack; pack = pack->next) {
948+
if (bitmap_index_contains_pack(bitmap, pack) ||
949+
open_pack_index(pack))
950+
continue;
951+
for_each_object_in_pack(pack, batch_one_object_packed,
952+
&payload, flags);
953+
}
954+
} else {
955+
for_each_packed_object(the_repository, batch_one_object_packed,
956+
&payload, flags);
957+
}
958+
959+
free_bitmap_index(bitmap);
960+
}
961+
888962
static int batch_objects(struct batch_options *opt)
889963
{
890964
struct strbuf input = STRBUF_INIT;
@@ -921,7 +995,8 @@ static int batch_objects(struct batch_options *opt)
921995
struct object_cb_data cb;
922996
struct object_info empty = OBJECT_INFO_INIT;
923997

924-
if (!memcmp(&data.info, &empty, sizeof(empty)))
998+
if (!memcmp(&data.info, &empty, sizeof(empty)) &&
999+
opt->objects_filter.choice == LOFC_DISABLED)
9251000
data.skip_object_info = 1;
9261001

9271002
if (repo_has_promisor_remote(the_repository))
@@ -938,18 +1013,14 @@ static int batch_objects(struct batch_options *opt)
9381013

9391014
cb.seen = &seen;
9401015

941-
for_each_loose_object(batch_unordered_loose, &cb, 0);
942-
for_each_packed_object(the_repository, batch_unordered_packed,
943-
&cb, FOR_EACH_OBJECT_PACK_ORDER);
1016+
batch_each_object(opt, batch_unordered_object,
1017+
FOR_EACH_OBJECT_PACK_ORDER, &cb);
9441018

9451019
oidset_clear(&seen);
9461020
} else {
9471021
struct oid_array sa = OID_ARRAY_INIT;
9481022

949-
for_each_loose_object(collect_loose_object, &sa, 0);
950-
for_each_packed_object(the_repository, collect_packed_object,
951-
&sa, 0);
952-
1023+
batch_each_object(opt, collect_object, 0, &sa);
9531024
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
9541025

9551026
oid_array_clear(&sa);
@@ -1045,12 +1116,15 @@ int cmd_cat_file(int argc,
10451116
int opt_cw = 0;
10461117
int opt_epts = 0;
10471118
const char *exp_type = NULL, *obj_name = NULL;
1048-
struct batch_options batch = {0};
1119+
struct batch_options batch = {
1120+
.objects_filter = LIST_OBJECTS_FILTER_INIT,
1121+
};
10491122
int unknown_type = 0;
10501123
int input_nul_terminated = 0;
10511124
int nul_terminated = 0;
1125+
int ret;
10521126

1053-
const char * const usage[] = {
1127+
const char * const builtin_catfile_usage[] = {
10541128
N_("git cat-file <type> <object>"),
10551129
N_("git cat-file (-e | -p) <object>"),
10561130
N_("git cat-file (-t | -s) [--allow-unknown-type] <object>"),
@@ -1109,48 +1183,64 @@ int cmd_cat_file(int argc,
11091183
N_("run filters on object's content"), 'w'),
11101184
OPT_STRING(0, "path", &force_path, N_("blob|tree"),
11111185
N_("use a <path> for (--textconv | --filters); Not with 'batch'")),
1186+
OPT_CALLBACK(0, "filter", &batch.objects_filter, N_("args"),
1187+
N_("object filtering"), opt_parse_list_objects_filter),
11121188
OPT_END()
11131189
};
11141190

11151191
git_config(git_cat_file_config, NULL);
11161192

11171193
batch.buffer_output = -1;
11181194

1119-
argc = parse_options(argc, argv, prefix, options, usage, 0);
1195+
argc = parse_options(argc, argv, prefix, options, builtin_catfile_usage, 0);
11201196
opt_cw = (opt == 'c' || opt == 'w');
11211197
opt_epts = (opt == 'e' || opt == 'p' || opt == 't' || opt == 's');
11221198

11231199
if (use_mailmap)
11241200
read_mailmap(&mailmap);
11251201

1202+
switch (batch.objects_filter.choice) {
1203+
case LOFC_DISABLED:
1204+
break;
1205+
case LOFC_BLOB_NONE:
1206+
case LOFC_BLOB_LIMIT:
1207+
case LOFC_OBJECT_TYPE:
1208+
if (!batch.enabled)
1209+
usage(_("objects filter only supported in batch mode"));
1210+
break;
1211+
default:
1212+
usagef(_("objects filter not supported: '%s'"),
1213+
list_object_filter_config_name(batch.objects_filter.choice));
1214+
}
1215+
11261216
/* --batch-all-objects? */
11271217
if (opt == 'b')
11281218
batch.all_objects = 1;
11291219

11301220
/* Option compatibility */
11311221
if (force_path && !opt_cw)
11321222
usage_msg_optf(_("'%s=<%s>' needs '%s' or '%s'"),
1133-
usage, options,
1223+
builtin_catfile_usage, options,
11341224
"--path", _("path|tree-ish"), "--filters",
11351225
"--textconv");
11361226

11371227
/* Option compatibility with batch mode */
11381228
if (batch.enabled)
11391229
;
11401230
else if (batch.follow_symlinks)
1141-
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
1231+
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, options,
11421232
"--follow-symlinks");
11431233
else if (batch.buffer_output >= 0)
1144-
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
1234+
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, options,
11451235
"--buffer");
11461236
else if (batch.all_objects)
1147-
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
1237+
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, options,
11481238
"--batch-all-objects");
11491239
else if (input_nul_terminated)
1150-
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
1240+
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, options,
11511241
"-z");
11521242
else if (nul_terminated)
1153-
usage_msg_optf(_("'%s' requires a batch mode"), usage, options,
1243+
usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, options,
11541244
"-Z");
11551245

11561246
batch.input_delim = batch.output_delim = '\n';
@@ -1172,39 +1262,45 @@ int cmd_cat_file(int argc,
11721262
batch.transform_mode = opt;
11731263
else if (opt && opt != 'b')
11741264
usage_msg_optf(_("'-%c' is incompatible with batch mode"),
1175-
usage, options, opt);
1265+
builtin_catfile_usage, options, opt);
11761266
else if (argc)
1177-
usage_msg_opt(_("batch modes take no arguments"), usage,
1267+
usage_msg_opt(_("batch modes take no arguments"), builtin_catfile_usage,
11781268
options);
11791269

1180-
return batch_objects(&batch);
1270+
ret = batch_objects(&batch);
1271+
goto out;
11811272
}
11821273

11831274
if (opt) {
11841275
if (!argc && opt == 'c')
11851276
usage_msg_optf(_("<rev> required with '%s'"),
1186-
usage, options, "--textconv");
1277+
builtin_catfile_usage, options, "--textconv");
11871278
else if (!argc && opt == 'w')
11881279
usage_msg_optf(_("<rev> required with '%s'"),
1189-
usage, options, "--filters");
1280+
builtin_catfile_usage, options, "--filters");
11901281
else if (!argc && opt_epts)
11911282
usage_msg_optf(_("<object> required with '-%c'"),
1192-
usage, options, opt);
1283+
builtin_catfile_usage, options, opt);
11931284
else if (argc == 1)
11941285
obj_name = argv[0];
11951286
else
1196-
usage_msg_opt(_("too many arguments"), usage, options);
1287+
usage_msg_opt(_("too many arguments"), builtin_catfile_usage, options);
11971288
} else if (!argc) {
1198-
usage_with_options(usage, options);
1289+
usage_with_options(builtin_catfile_usage, options);
11991290
} else if (argc != 2) {
12001291
usage_msg_optf(_("only two arguments allowed in <type> <object> mode, not %d"),
1201-
usage, options, argc);
1292+
builtin_catfile_usage, options, argc);
12021293
} else if (argc) {
12031294
exp_type = argv[0];
12041295
obj_name = argv[1];
12051296
}
12061297

12071298
if (unknown_type && opt != 't' && opt != 's')
12081299
die("git cat-file --allow-unknown-type: use with -s or -t");
1209-
return cat_one_file(opt, exp_type, obj_name, unknown_type);
1300+
1301+
ret = cat_one_file(opt, exp_type, obj_name, unknown_type);
1302+
1303+
out:
1304+
list_objects_filter_release(&batch.objects_filter);
1305+
return ret;
12101306
}

0 commit comments

Comments
 (0)